Search in sources :

Example 1 with TokenPartitioner

use of org.apache.cassandra.spark.data.partitioner.TokenPartitioner in project spark-cassandra-bulkreader by jberragan.

the class KryoSerializationTests method testTokenPartitioner.

@Test
public void testTokenPartitioner() {
    qt().forAll(TestUtils.partitioners(), arbitrary().pick(Arrays.asList(3, 16, 128)), arbitrary().pick(Arrays.asList(1, 4, 16)), arbitrary().pick(Arrays.asList(4, 16, 64))).checkAssert((partitioner, numInstances, defaultParallelism, numCores) -> {
        final CassandraRing ring = TestUtils.createRing(partitioner, numInstances);
        final TokenPartitioner tokenPartitioner = new TokenPartitioner(ring, defaultParallelism, numCores);
        final Output out = KryoSerializationTests.serialize(tokenPartitioner);
        final TokenPartitioner deserialized = KryoSerializationTests.deserialize(out, TokenPartitioner.class);
        assertNotNull(deserialized);
        assertEquals(tokenPartitioner.numPartitions(), deserialized.numPartitions());
        assertEquals(tokenPartitioner.subRanges().size(), deserialized.subRanges().size());
        for (int i = 0; i < tokenPartitioner.subRanges().size(); i++) {
            assertEquals(tokenPartitioner.subRanges().get(i), deserialized.subRanges().get(i));
        }
        assertEquals(tokenPartitioner.ring(), deserialized.ring());
    });
}
Also used : CassandraRing(org.apache.cassandra.spark.data.partitioner.CassandraRing) Output(com.esotericsoftware.kryo.io.Output) TokenPartitioner(org.apache.cassandra.spark.data.partitioner.TokenPartitioner) Test(org.junit.Test)

Example 2 with TokenPartitioner

use of org.apache.cassandra.spark.data.partitioner.TokenPartitioner in project spark-cassandra-bulkreader by jberragan.

the class PartitionKeyFilterTests method testTokenRing.

@Test
public void testTokenRing() {
    qt().forAll(TestUtils.partitioners(), arbitrary().pick(Arrays.asList(1, 3, 6, 12, 128))).checkAssert((partitioner, numInstances) -> {
        final CassandraRing ring = TestUtils.createRing(partitioner, numInstances);
        final TokenPartitioner tokenPartitioner = new TokenPartitioner(ring, 24, 24);
        final List<BigInteger> boundaryTokens = IntStream.range(0, tokenPartitioner.numPartitions()).mapToObj(tokenPartitioner::getTokenRange).map(r -> Arrays.asList(r.lowerEndpoint(), midPoint(r), r.upperEndpoint())).flatMap(Collection::stream).collect(Collectors.toList());
        for (final BigInteger token : boundaryTokens) {
            // check boundary tokens only match 1 Spark token range
            final PartitionKeyFilter filter = PartitionKeyFilter.create(Int32Type.instance.fromString("11"), token);
            assertEquals(1, tokenPartitioner.subRanges().stream().filter(filter::overlaps).count());
        }
    });
}
Also used : CassandraRing(org.apache.cassandra.spark.data.partitioner.CassandraRing) BigInteger(java.math.BigInteger) TokenPartitioner(org.apache.cassandra.spark.data.partitioner.TokenPartitioner) PartitionKeyFilter(org.apache.cassandra.spark.sparksql.filters.PartitionKeyFilter) Test(org.junit.Test)

Example 3 with TokenPartitioner

use of org.apache.cassandra.spark.data.partitioner.TokenPartitioner in project spark-cassandra-bulkreader by jberragan.

the class PartitionedDataLayerTests method runSplitTests.

@SuppressWarnings("UnstableApiUsage")
private static void runSplitTests(final int minReplicas, final PartitionedDataLayer.AvailabilityHint... availabilityHint) {
    final int numInstances = availabilityHint.length;
    TestUtils.runTest((partitioner, dir, bridge) -> {
        final CassandraRing ring = TestUtils.createRing(partitioner, numInstances);
        final List<CassandraInstance> instances = new ArrayList<>(ring.instances());
        instances.sort(Comparator.comparing(CassandraInstance::nodeName));
        final TokenPartitioner tokenPartitioner = new TokenPartitioner(ring, 1, 32);
        final Map<CassandraInstance, PartitionedDataLayer.AvailabilityHint> availableMap = new HashMap<>(numInstances);
        for (int i = 0; i < numInstances; i++) {
            availableMap.put(instances.get(i), availabilityHint[i]);
        }
        final Map<Range<BigInteger>, List<CassandraInstance>> ranges = ring.getSubRanges(tokenPartitioner.getTokenRange(0)).asMapOfRanges();
        final PartitionedDataLayer.ReplicaSet replicaSet = PartitionedDataLayer.splitReplicas(instances, ranges, availableMap::get, minReplicas, 0);
        assertEquals(minReplicas, replicaSet.primary().size());
        assertEquals(numInstances - minReplicas, replicaSet.backup().size());
        final List<CassandraInstance> sortedInstances = new ArrayList<>(instances);
        sortedInstances.sort(Comparator.comparing(availableMap::get));
        for (int i = 0; i < sortedInstances.size(); i++) {
            if (i < minReplicas) {
                assertTrue(replicaSet.primary().contains(sortedInstances.get(i)));
            } else {
                assertTrue(replicaSet.backup().contains(sortedInstances.get(i)));
            }
        }
    });
}
Also used : CassandraRing(org.apache.cassandra.spark.data.partitioner.CassandraRing) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) Range(com.google.common.collect.Range) CassandraInstance(org.apache.cassandra.spark.data.partitioner.CassandraInstance) List(java.util.List) ArrayList(java.util.ArrayList) TokenPartitioner(org.apache.cassandra.spark.data.partitioner.TokenPartitioner)

Example 4 with TokenPartitioner

use of org.apache.cassandra.spark.data.partitioner.TokenPartitioner in project spark-cassandra-bulkreader by jberragan.

the class PartitionedDataLayer method sstables.

@Override
public SSTablesSupplier sstables(final List<CustomFilter> filters) {
    // get token range for Spark partition
    final TokenPartitioner tokenPartitioner = tokenPartitioner();
    final int partitionId = TaskContext.getPartitionId();
    if (partitionId < 0 || partitionId >= tokenPartitioner.numPartitions()) {
        throw new IllegalStateException("PartitionId outside expected range: " + partitionId);
    }
    // get all replicas overlapping partition token range
    final Range<BigInteger> range = tokenPartitioner.getTokenRange(partitionId);
    final CassandraRing ring = ring();
    final ReplicationFactor rf = ring.replicationFactor();
    validateReplicationFactor(rf);
    final Map<Range<BigInteger>, List<CassandraInstance>> instRanges;
    final Map<Range<BigInteger>, List<CassandraInstance>> subRanges = ring().getSubRanges(range).asMapOfRanges();
    if (filters.stream().noneMatch(CustomFilter::canFilterByKey)) {
        instRanges = subRanges;
    } else {
        instRanges = new HashMap<>();
        subRanges.keySet().forEach(instRange -> {
            if (filters.stream().filter(CustomFilter::canFilterByKey).anyMatch(filter -> filter.overlaps(instRange))) {
                instRanges.putIfAbsent(instRange, subRanges.get(instRange));
            }
        });
    }
    final Set<CassandraInstance> replicas = PartitionedDataLayer.rangesToReplicas(consistencyLevel, dc, instRanges);
    LOGGER.info("Creating partitioned SSTablesSupplier for Spark partition partitionId={} rangeLower={} rangeUpper={} numReplicas={}", partitionId, range.lowerEndpoint(), range.upperEndpoint(), replicas.size());
    // use consistency level and replication factor to calculate min number of replicas required to satisfy consistency level
    // split replicas into 'primary' and 'backup' replicas, attempt on primary replicas and use backups to retry in the event of a failure
    final int minReplicas = consistencyLevel.blockFor(rf, dc);
    final ReplicaSet replicaSet = PartitionedDataLayer.splitReplicas(consistencyLevel, dc, instRanges, replicas, this::getAvailability, minReplicas, partitionId);
    if (replicaSet.primary().size() < minReplicas) {
        // could not find enough primary replicas to meet consistency level
        assert replicaSet.backup.isEmpty();
        throw new NotEnoughReplicasException(consistencyLevel, range, minReplicas, replicas.size(), dc);
    }
    final ExecutorService executor = executorService();
    final Stats stats = stats();
    final Set<SingleReplica> primaryReplicas = replicaSet.primary().stream().map(inst -> new SingleReplica(inst, this, range, partitionId, executor, stats, replicaSet.isRepairPrimary(inst))).collect(Collectors.toSet());
    final Set<SingleReplica> backupReplicas = replicaSet.backup().stream().map(inst -> new SingleReplica(inst, this, range, partitionId, executor, stats, true)).collect(Collectors.toSet());
    return new MultipleReplicas(primaryReplicas, backupReplicas, stats);
}
Also used : NotImplementedException(org.apache.commons.lang3.NotImplementedException) ConsistencyLevel(org.apache.cassandra.spark.data.partitioner.ConsistencyLevel) CustomFilter(org.apache.cassandra.spark.sparksql.filters.CustomFilter) LoggerFactory(org.slf4j.LoggerFactory) HashCodeBuilder(org.apache.commons.lang.builder.HashCodeBuilder) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) CassandraInstance(org.apache.cassandra.spark.data.partitioner.CassandraInstance) Function(java.util.function.Function) ByteBuffer(java.nio.ByteBuffer) HashSet(java.util.HashSet) Stats(org.apache.cassandra.spark.stats.Stats) Map(java.util.Map) CassandraRing(org.apache.cassandra.spark.data.partitioner.CassandraRing) BigInteger(java.math.BigInteger) NoMatchFoundException(org.apache.cassandra.spark.sparksql.NoMatchFoundException) ExecutorService(java.util.concurrent.ExecutorService) SparkRangeFilter(org.apache.cassandra.spark.sparksql.filters.SparkRangeFilter) Logger(org.slf4j.Logger) TaskContext(org.apache.spark.TaskContext) Collection(java.util.Collection) Range(com.google.common.collect.Range) Set(java.util.Set) MultipleReplicas(org.apache.cassandra.spark.data.partitioner.MultipleReplicas) SingleReplica(org.apache.cassandra.spark.data.partitioner.SingleReplica) Collectors(java.util.stream.Collectors) TokenPartitioner(org.apache.cassandra.spark.data.partitioner.TokenPartitioner) NotEnoughReplicasException(org.apache.cassandra.spark.data.partitioner.NotEnoughReplicasException) Nullable(org.jetbrains.annotations.Nullable) List(java.util.List) Stream(java.util.stream.Stream) Partitioner(org.apache.cassandra.spark.data.partitioner.Partitioner) EqualsBuilder(org.apache.commons.lang.builder.EqualsBuilder) Preconditions(com.google.common.base.Preconditions) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Comparator(java.util.Comparator) NotNull(org.jetbrains.annotations.NotNull) CassandraRing(org.apache.cassandra.spark.data.partitioner.CassandraRing) SingleReplica(org.apache.cassandra.spark.data.partitioner.SingleReplica) Range(com.google.common.collect.Range) CassandraInstance(org.apache.cassandra.spark.data.partitioner.CassandraInstance) CustomFilter(org.apache.cassandra.spark.sparksql.filters.CustomFilter) NotEnoughReplicasException(org.apache.cassandra.spark.data.partitioner.NotEnoughReplicasException) ExecutorService(java.util.concurrent.ExecutorService) Stats(org.apache.cassandra.spark.stats.Stats) BigInteger(java.math.BigInteger) List(java.util.List) TokenPartitioner(org.apache.cassandra.spark.data.partitioner.TokenPartitioner) MultipleReplicas(org.apache.cassandra.spark.data.partitioner.MultipleReplicas)

Example 5 with TokenPartitioner

use of org.apache.cassandra.spark.data.partitioner.TokenPartitioner in project spark-cassandra-bulkreader by jberragan.

the class PartitionedDataLayerTests method testSplitReplicas.

@SuppressWarnings("UnstableApiUsage")
private static void testSplitReplicas(final CassandraRing ring, final ConsistencyLevel consistencyLevel, final int defaultParallelism, final int numCores, final ReplicationFactor rf, final String dc) {
    final TokenPartitioner tokenPartitioner = new TokenPartitioner(ring, defaultParallelism, numCores);
    for (int partition = 0; partition < tokenPartitioner.numPartitions(); partition++) {
        final Range<BigInteger> range = tokenPartitioner.getTokenRange(partition);
        final Map<Range<BigInteger>, List<CassandraInstance>> subRanges = ring.getSubRanges(range).asMapOfRanges();
        final Set<CassandraInstance> replicas = PartitionedDataLayer.rangesToReplicas(consistencyLevel, dc, subRanges);
        final Function<CassandraInstance, PartitionedDataLayer.AvailabilityHint> availability = (instances) -> UP;
        final int minReplicas = consistencyLevel.blockFor(rf, dc);
        final PartitionedDataLayer.ReplicaSet replicaSet = PartitionedDataLayer.splitReplicas(consistencyLevel, dc, subRanges, replicas, availability, minReplicas, 0);
        assertNotNull(replicaSet);
        assertTrue(Collections.disjoint(replicaSet.primary(), replicaSet.backup()));
        assertEquals(replicas.size(), replicaSet.primary().size() + replicaSet.backup().size());
    }
}
Also used : CassandraBridge(org.apache.cassandra.spark.reader.CassandraBridge) EmptyScanner(org.apache.cassandra.spark.reader.EmptyScanner) Arrays(java.util.Arrays) DOWN(org.apache.cassandra.spark.data.PartitionedDataLayer.AvailabilityHint.DOWN) ANY(org.apache.cassandra.spark.data.partitioner.ConsistencyLevel.ANY) CustomFilter(org.apache.cassandra.spark.sparksql.filters.CustomFilter) IStreamScanner(org.apache.cassandra.spark.reader.IStreamScanner) ONE(org.apache.cassandra.spark.data.partitioner.ConsistencyLevel.ONE) ByteBuffer(java.nio.ByteBuffer) UP(org.apache.cassandra.spark.data.PartitionedDataLayer.AvailabilityHint.UP) PartitionKeyFilter(org.apache.cassandra.spark.sparksql.filters.PartitionKeyFilter) CALLS_REAL_METHODS(org.mockito.Mockito.CALLS_REAL_METHODS) TestUtils(org.apache.cassandra.spark.TestUtils) Map(java.util.Map) BigInteger(java.math.BigInteger) EACH_QUORUM(org.apache.cassandra.spark.data.partitioner.ConsistencyLevel.EACH_QUORUM) ImmutableMap(com.google.common.collect.ImmutableMap) TWO(org.apache.cassandra.spark.data.partitioner.ConsistencyLevel.TWO) Range(com.google.common.collect.Range) Set(java.util.Set) QuickTheory.qt(org.quicktheories.QuickTheory.qt) Matchers.any(org.mockito.Matchers.any) NotEnoughReplicasException(org.apache.cassandra.spark.data.partitioner.NotEnoughReplicasException) List(java.util.List) MultipleReplicasTests(org.apache.cassandra.spark.data.partitioner.MultipleReplicasTests) Assert.assertFalse(org.junit.Assert.assertFalse) JDKSerializationTests(org.apache.cassandra.spark.data.partitioner.JDKSerializationTests) Mockito.mock(org.mockito.Mockito.mock) RandomUtils(org.apache.commons.lang3.RandomUtils) ConsistencyLevel(org.apache.cassandra.spark.data.partitioner.ConsistencyLevel) HashMap(java.util.HashMap) LOCAL_QUORUM(org.apache.cassandra.spark.data.partitioner.ConsistencyLevel.LOCAL_QUORUM) CassandraInstance(org.apache.cassandra.spark.data.partitioner.CassandraInstance) Function(java.util.function.Function) ArrayList(java.util.ArrayList) UNKNOWN(org.apache.cassandra.spark.data.PartitionedDataLayer.AvailabilityHint.UNKNOWN) ALL(org.apache.cassandra.spark.data.partitioner.ConsistencyLevel.ALL) Generate.pick(org.quicktheories.generators.Generate.pick) CassandraRing(org.apache.cassandra.spark.data.partitioner.CassandraRing) TaskContext(org.apache.spark.TaskContext) Assert.assertNotNull(org.junit.Assert.assertNotNull) Assert.assertTrue(org.junit.Assert.assertTrue) Test(org.junit.Test) Mockito.when(org.mockito.Mockito.when) TokenPartitioner(org.apache.cassandra.spark.data.partitioner.TokenPartitioner) Partitioner(org.apache.cassandra.spark.data.partitioner.Partitioner) Comparator(java.util.Comparator) Collections(java.util.Collections) TestSchema(org.apache.cassandra.spark.TestSchema) Assert.assertEquals(org.junit.Assert.assertEquals) Range(com.google.common.collect.Range) CassandraInstance(org.apache.cassandra.spark.data.partitioner.CassandraInstance) BigInteger(java.math.BigInteger) List(java.util.List) ArrayList(java.util.ArrayList) TokenPartitioner(org.apache.cassandra.spark.data.partitioner.TokenPartitioner)

Aggregations

TokenPartitioner (org.apache.cassandra.spark.data.partitioner.TokenPartitioner)8 CassandraRing (org.apache.cassandra.spark.data.partitioner.CassandraRing)7 Range (com.google.common.collect.Range)6 BigInteger (java.math.BigInteger)6 ByteBuffer (java.nio.ByteBuffer)4 HashMap (java.util.HashMap)4 List (java.util.List)4 CassandraInstance (org.apache.cassandra.spark.data.partitioner.CassandraInstance)4 CustomFilter (org.apache.cassandra.spark.sparksql.filters.CustomFilter)4 Test (org.junit.Test)4 Comparator (java.util.Comparator)3 Map (java.util.Map)3 Set (java.util.Set)3 Function (java.util.function.Function)3 ConsistencyLevel (org.apache.cassandra.spark.data.partitioner.ConsistencyLevel)3 NotEnoughReplicasException (org.apache.cassandra.spark.data.partitioner.NotEnoughReplicasException)3 Partitioner (org.apache.cassandra.spark.data.partitioner.Partitioner)3 PartitionKeyFilter (org.apache.cassandra.spark.sparksql.filters.PartitionKeyFilter)3 Stats (org.apache.cassandra.spark.stats.Stats)3 TaskContext (org.apache.spark.TaskContext)3