use of org.apache.cassandra.spark.sparksql.SparkRowIterator in project spark-cassandra-bulkreader by jberragan.
the class SSTableReaderTests method testSkipPartitionsCompactionScanner.
@Test
public void testSkipPartitionsCompactionScanner() {
runTest((partitioner, dir, bridge) -> {
// write an SSTable
final TestSchema schema = TestSchema.basic(bridge);
TestUtils.writeSSTable(bridge, dir, partitioner, schema, (writer) -> {
for (int i = 0; i < NUM_ROWS; i++) {
for (int j = 0; j < NUM_COLS; j++) {
writer.write(i, j, i + j);
}
}
});
assertEquals(1, countSSTables(dir));
final Path dataFile = getFirstFileType(dir, DataLayer.FileType.DATA);
final TableMetadata metaData = schema.schemaBuilder(partitioner).tableMetaData();
final Set<SparkSSTableReader> readers = new HashSet<>(1);
final TestDataLayer dataLayer = new TestDataLayer(bridge, Collections.singletonList(dataFile), schema.buildSchema()) {
public SSTablesSupplier sstables(final List<CustomFilter> filters) {
return new SSTablesSupplier() {
public <T extends SparkSSTableReader> Set<T> openAll(ReaderOpener<T> readerOpener) {
return (Set<T>) readers;
}
};
}
};
final Range<BigInteger> sparkTokenRange;
switch(partitioner) {
case Murmur3Partitioner:
sparkTokenRange = Range.closed(BigInteger.valueOf(-9223372036854775808L), BigInteger.valueOf(3074457345618258602L));
break;
case RandomPartitioner:
sparkTokenRange = Range.closed(BigInteger.ZERO, new BigInteger("916176208424801638531839357843455255"));
break;
default:
throw new RuntimeException("Unexpected partitioner: " + partitioner);
}
final SparkRangeFilter rangeFilter = SparkRangeFilter.create(sparkTokenRange);
final AtomicBoolean pass = new AtomicBoolean(true);
final AtomicInteger skipCount = new AtomicInteger(0);
final Stats stats = new Stats() {
@Override
public void skippedPartition(ByteBuffer key, BigInteger token) {
LOGGER.info("Skipping partition: " + token);
skipCount.incrementAndGet();
if (sparkTokenRange.contains(token)) {
LOGGER.info("Should not skip partition: " + token);
pass.set(false);
}
}
};
final FourZeroSSTableReader reader = openReader(metaData, dataLayer.listSSTables().findFirst().orElseThrow(() -> new RuntimeException("Could not find SSTable")), Collections.singletonList(rangeFilter), false, stats);
readers.add(reader);
// read the SSTable end-to-end using SparkRowIterator and verify it skips the required partitions
// and all the partitions returned are within the Spark token range.
final SparkRowIterator it = new SparkRowIterator(dataLayer);
int count = 0;
while (it.next()) {
final InternalRow row = it.get();
assertEquals(row.getInt(2), row.getInt(0) + row.getInt(1));
final DecoratedKey key = FourZero.getPartitioner(partitioner).decorateKey((ByteBuffer) ByteBuffer.allocate(4).putInt(row.getInt(0)).flip());
final BigInteger token = FourZeroUtils.tokenToBigInteger(key.getToken());
assertTrue(sparkTokenRange.contains(token));
count++;
}
assertTrue(skipCount.get() > 0);
// should skip out of range partitions here
assertEquals((NUM_ROWS - skipCount.get()) * NUM_COLS, count);
assertTrue(pass.get());
});
}
Aggregations