use of org.apache.cassandra.spark.shaded.fourzero.cassandra.db.DecoratedKey in project spark-cassandra-bulkreader by jberragan.
the class SSTableReaderTests method testSSTableRange.
@Test
public void testSSTableRange() {
runTest((partitioner, dir, bridge) -> {
// write an SSTable
final TestSchema schema = TestSchema.basic(bridge);
TestUtils.writeSSTable(bridge, dir, partitioner, schema, (writer) -> {
for (int i = 0; i < 10; i++) {
for (int j = 0; j < 1; j++) {
writer.write(i, j, i + j);
}
}
});
assertEquals(1, countSSTables(dir));
final Path dataFile = getFirstFileType(dir, DataLayer.FileType.DATA);
final TableMetadata metaData = schema.schemaBuilder(partitioner).tableMetaData();
final TestDataLayer dataLayer = new TestDataLayer(bridge, Collections.singletonList(dataFile));
final SparkSSTableReader reader = openReader(metaData, dataLayer.listSSTables().findFirst().orElseThrow(() -> new RuntimeException("Could not find SSTable")));
assertNotNull(reader.firstToken());
assertNotNull(reader.lastToken());
// verify primary Index.db file matches first and last
final Path indexFile = getFirstFileType(dir, DataLayer.FileType.INDEX);
final Pair<DecoratedKey, DecoratedKey> firstAndLast;
try (final InputStream is = new BufferedInputStream(new FileInputStream(indexFile.toFile()))) {
final Pair<ByteBuffer, ByteBuffer> keys = FourZeroUtils.readPrimaryIndex(is, true, Collections.emptyList());
firstAndLast = Pair.of(FourZero.getPartitioner(partitioner).decorateKey(keys.getLeft()), FourZero.getPartitioner(partitioner).decorateKey(keys.getRight()));
}
final BigInteger first = FourZeroUtils.tokenToBigInteger(firstAndLast.getLeft().getToken());
final BigInteger last = FourZeroUtils.tokenToBigInteger(firstAndLast.getRight().getToken());
assertEquals(first, reader.firstToken());
assertEquals(last, reader.lastToken());
switch(partitioner) {
case Murmur3Partitioner:
assertFalse(SparkSSTableReader.overlaps(reader, Range.closed(Partitioner.Murmur3Partitioner.minToken(), Partitioner.Murmur3Partitioner.minToken())));
assertFalse(SparkSSTableReader.overlaps(reader, Range.closed(Partitioner.Murmur3Partitioner.minToken(), Partitioner.Murmur3Partitioner.minToken())));
assertFalse(SparkSSTableReader.overlaps(reader, Range.closed(BigInteger.valueOf(-8710962479251732708L), BigInteger.valueOf(-7686143364045646507L))));
assertTrue(SparkSSTableReader.overlaps(reader, Range.closed(BigInteger.valueOf(-7509452495886106294L), BigInteger.valueOf(-7509452495886106293L))));
assertTrue(SparkSSTableReader.overlaps(reader, Range.closed(BigInteger.valueOf(-7509452495886106293L), BigInteger.valueOf(-7509452495886106293L))));
assertTrue(SparkSSTableReader.overlaps(reader, Range.closed(BigInteger.valueOf(-7509452495886106293L), BigInteger.valueOf(2562047788015215502L))));
assertTrue(SparkSSTableReader.overlaps(reader, Range.closed(BigInteger.valueOf(-7509452495886106293L), BigInteger.valueOf(9010454139840013625L))));
assertTrue(SparkSSTableReader.overlaps(reader, Range.closed(BigInteger.valueOf(9010454139840013625L), BigInteger.valueOf(9010454139840013625L))));
assertFalse(SparkSSTableReader.overlaps(reader, Range.closed(Partitioner.Murmur3Partitioner.maxToken(), Partitioner.Murmur3Partitioner.maxToken())));
return;
case RandomPartitioner:
assertFalse(SparkSSTableReader.overlaps(reader, Range.closed(Partitioner.RandomPartitioner.minToken(), Partitioner.RandomPartitioner.minToken())));
assertFalse(SparkSSTableReader.overlaps(reader, Range.closed(BigInteger.valueOf(0L), BigInteger.valueOf(500L))));
assertFalse(SparkSSTableReader.overlaps(reader, Range.closed(new BigInteger("18837662806270881894834867523173387677"), new BigInteger("18837662806270881894834867523173387677"))));
assertTrue(SparkSSTableReader.overlaps(reader, Range.closed(new BigInteger("18837662806270881894834867523173387678"), new BigInteger("18837662806270881894834867523173387678"))));
assertTrue(SparkSSTableReader.overlaps(reader, Range.closed(new BigInteger("18837662806270881894834867523173387679"), new BigInteger("18837662806270881894834867523173387679"))));
assertTrue(SparkSSTableReader.overlaps(reader, Range.closed(new BigInteger("18837662806270881894834867523173387679"), new BigInteger("137731376325982006772573399291321493164"))));
assertTrue(SparkSSTableReader.overlaps(reader, Range.closed(new BigInteger("137731376325982006772573399291321493164"), new BigInteger("137731376325982006772573399291321493164"))));
assertFalse(SparkSSTableReader.overlaps(reader, Range.closed(new BigInteger("137731376325982006772573399291321493165"), new BigInteger("137731376325982006772573399291321493165"))));
assertFalse(SparkSSTableReader.overlaps(reader, Range.closed(Partitioner.RandomPartitioner.maxToken(), Partitioner.RandomPartitioner.maxToken())));
return;
default:
throw new RuntimeException("Unexpected partitioner: " + partitioner);
}
});
}
use of org.apache.cassandra.spark.shaded.fourzero.cassandra.db.DecoratedKey in project spark-cassandra-bulkreader by jberragan.
the class SSTableReaderTests method testSkipPartitionsCompactionScanner.
@Test
public void testSkipPartitionsCompactionScanner() {
runTest((partitioner, dir, bridge) -> {
// write an SSTable
final TestSchema schema = TestSchema.basic(bridge);
TestUtils.writeSSTable(bridge, dir, partitioner, schema, (writer) -> {
for (int i = 0; i < NUM_ROWS; i++) {
for (int j = 0; j < NUM_COLS; j++) {
writer.write(i, j, i + j);
}
}
});
assertEquals(1, countSSTables(dir));
final Path dataFile = getFirstFileType(dir, DataLayer.FileType.DATA);
final TableMetadata metaData = schema.schemaBuilder(partitioner).tableMetaData();
final Set<SparkSSTableReader> readers = new HashSet<>(1);
final TestDataLayer dataLayer = new TestDataLayer(bridge, Collections.singletonList(dataFile), schema.buildSchema()) {
public SSTablesSupplier sstables(final List<CustomFilter> filters) {
return new SSTablesSupplier() {
public <T extends SparkSSTableReader> Set<T> openAll(ReaderOpener<T> readerOpener) {
return (Set<T>) readers;
}
};
}
};
final Range<BigInteger> sparkTokenRange;
switch(partitioner) {
case Murmur3Partitioner:
sparkTokenRange = Range.closed(BigInteger.valueOf(-9223372036854775808L), BigInteger.valueOf(3074457345618258602L));
break;
case RandomPartitioner:
sparkTokenRange = Range.closed(BigInteger.ZERO, new BigInteger("916176208424801638531839357843455255"));
break;
default:
throw new RuntimeException("Unexpected partitioner: " + partitioner);
}
final SparkRangeFilter rangeFilter = SparkRangeFilter.create(sparkTokenRange);
final AtomicBoolean pass = new AtomicBoolean(true);
final AtomicInteger skipCount = new AtomicInteger(0);
final Stats stats = new Stats() {
@Override
public void skippedPartition(ByteBuffer key, BigInteger token) {
LOGGER.info("Skipping partition: " + token);
skipCount.incrementAndGet();
if (sparkTokenRange.contains(token)) {
LOGGER.info("Should not skip partition: " + token);
pass.set(false);
}
}
};
final FourZeroSSTableReader reader = openReader(metaData, dataLayer.listSSTables().findFirst().orElseThrow(() -> new RuntimeException("Could not find SSTable")), Collections.singletonList(rangeFilter), false, stats);
readers.add(reader);
// read the SSTable end-to-end using SparkRowIterator and verify it skips the required partitions
// and all the partitions returned are within the Spark token range.
final SparkRowIterator it = new SparkRowIterator(dataLayer);
int count = 0;
while (it.next()) {
final InternalRow row = it.get();
assertEquals(row.getInt(2), row.getInt(0) + row.getInt(1));
final DecoratedKey key = FourZero.getPartitioner(partitioner).decorateKey((ByteBuffer) ByteBuffer.allocate(4).putInt(row.getInt(0)).flip());
final BigInteger token = FourZeroUtils.tokenToBigInteger(key.getToken());
assertTrue(sparkTokenRange.contains(token));
count++;
}
assertTrue(skipCount.get() > 0);
// should skip out of range partitions here
assertEquals((NUM_ROWS - skipCount.get()) * NUM_COLS, count);
assertTrue(pass.get());
});
}
use of org.apache.cassandra.spark.shaded.fourzero.cassandra.db.DecoratedKey in project spark-cassandra-bulkreader by jberragan.
the class FourZeroUtilsTests method testReadFirstLastPartitionKey.
@Test
public void testReadFirstLastPartitionKey() {
runTest((partitioner, dir, bridge) -> {
// write an SSTable
final TestSchema schema = TestSchema.basic(bridge);
TestUtils.writeSSTable(bridge, dir, partitioner, schema, (writer) -> {
for (int i = 0; i < NUM_ROWS; i++) {
for (int j = 0; j < NUM_COLS; j++) {
writer.write(i, j, i + j);
}
}
});
assertEquals(1, countSSTables(dir));
// read Summary.db file for first and last partition keys from Summary.db
final Path summaryFile = getFirstFileType(dir, DataLayer.FileType.SUMMARY);
final SummaryDbUtils.Summary summaryKeys;
try (final InputStream in = new BufferedInputStream(Files.newInputStream(summaryFile))) {
summaryKeys = SummaryDbUtils.readSummary(in, Murmur3Partitioner.instance, 128, 2048);
}
assertNotNull(summaryKeys);
assertNotNull(summaryKeys.first());
assertNotNull(summaryKeys.last());
// read Primary Index.db file for first and last partition keys from Summary.db
final Path indexFile = getFirstFileType(dir, DataLayer.FileType.INDEX);
final Pair<DecoratedKey, DecoratedKey> indexKeys;
try (final InputStream in = new BufferedInputStream(Files.newInputStream(indexFile))) {
final Pair<ByteBuffer, ByteBuffer> keys = FourZeroUtils.readPrimaryIndex(in, true, Collections.emptyList());
indexKeys = Pair.of(Murmur3Partitioner.instance.decorateKey(keys.getLeft()), Murmur3Partitioner.instance.decorateKey(keys.getRight()));
}
assertNotNull(indexKeys);
assertEquals(indexKeys.getLeft(), summaryKeys.first());
assertEquals(indexKeys.getRight(), summaryKeys.last());
});
}
use of org.apache.cassandra.spark.shaded.fourzero.cassandra.db.DecoratedKey in project spark-cassandra-bulkreader by jberragan.
the class SummaryDbUtils method readSummary.
/**
* Read and deserialize the Summary.db file.
*
* @param summaryStream input stream for Summary.db file
* @param partitioner token partitioner
* @param minIndexInterval min index interval
* @param maxIndexInterval max index interval
* @return Summary object
* @throws IOException io exception
*/
static Summary readSummary(final InputStream summaryStream, final IPartitioner partitioner, final int minIndexInterval, final int maxIndexInterval) throws IOException {
if (summaryStream == null) {
return null;
}
try (final DataInputStream is = new DataInputStream(summaryStream)) {
final IndexSummary indexSummary = IndexSummary.serializer.deserialize(is, partitioner, minIndexInterval, maxIndexInterval);
final DecoratedKey firstKey = partitioner.decorateKey(ByteBufferUtil.readWithLength(is));
final DecoratedKey lastKey = partitioner.decorateKey(ByteBufferUtil.readWithLength(is));
return new Summary(indexSummary, firstKey, lastKey);
}
}
use of org.apache.cassandra.spark.shaded.fourzero.cassandra.db.DecoratedKey in project spark-cassandra-bulkreader by jberragan.
the class SSTableCacheTests method testCache.
@Test
public void testCache() {
runTest((partitioner, dir, bridge) -> {
// write an SSTable
final TestSchema schema = TestSchema.basic(bridge);
TestUtils.writeSSTable(bridge, dir, partitioner, schema, (writer) -> IntStream.range(0, 10).forEach(i -> writer.write(i, 0, i)));
TestUtils.writeSSTable(bridge, dir, partitioner, schema, (writer) -> IntStream.range(20, 100).forEach(i -> writer.write(i, 1, i)));
final List<Path> dataFiles = getFileType(dir, DataLayer.FileType.DATA).collect(Collectors.toList());
final Path dataFile0 = dataFiles.get(0);
final Path dataFile1 = dataFiles.get(1);
final TestDataLayer dataLayer = new TestDataLayer(bridge, dataFiles);
final List<DataLayer.SSTable> sstables = dataLayer.listSSTables().collect(Collectors.toList());
final TableMetadata metaData = new FourZeroSchemaBuilder(schema.createStmt, schema.keyspace, new ReplicationFactor(ReplicationFactor.ReplicationStrategy.SimpleStrategy, ImmutableMap.of("replication_factor", 1)), partitioner).tableMetaData();
final DataLayer.SSTable ssTable0 = sstables.get(0);
assertFalse(SSTableCache.INSTANCE.containsSummary(ssTable0));
assertFalse(SSTableCache.INSTANCE.containsIndex(ssTable0));
assertFalse(SSTableCache.INSTANCE.containsStats(ssTable0));
final SummaryDbUtils.Summary key1 = SSTableCache.INSTANCE.keysFromSummary(metaData, ssTable0);
assertNotNull(key1);
assertTrue(SSTableCache.INSTANCE.containsSummary(ssTable0));
assertFalse(SSTableCache.INSTANCE.containsIndex(ssTable0));
assertFalse(SSTableCache.INSTANCE.containsStats(ssTable0));
assertFalse(SSTableCache.INSTANCE.containsFilter(ssTable0));
final Pair<DecoratedKey, DecoratedKey> key2 = SSTableCache.INSTANCE.keysFromIndex(metaData, ssTable0);
assertEquals(key1.first(), key2.getLeft());
assertEquals(key1.last(), key2.getRight());
assertTrue(SSTableCache.INSTANCE.containsSummary(ssTable0));
assertTrue(SSTableCache.INSTANCE.containsIndex(ssTable0));
assertFalse(SSTableCache.INSTANCE.containsStats(ssTable0));
assertFalse(SSTableCache.INSTANCE.containsFilter(ssTable0));
final Descriptor descriptor0 = Descriptor.fromFilename(new File(String.format("./%s/%s", schema.keyspace, schema.table), dataFile0.getFileName().toString()));
final Map<MetadataType, MetadataComponent> componentMap = SSTableCache.INSTANCE.componentMapFromStats(ssTable0, descriptor0);
assertNotNull(componentMap);
assertTrue(SSTableCache.INSTANCE.containsSummary(ssTable0));
assertTrue(SSTableCache.INSTANCE.containsIndex(ssTable0));
assertTrue(SSTableCache.INSTANCE.containsStats(ssTable0));
assertFalse(SSTableCache.INSTANCE.containsFilter(ssTable0));
assertEquals(componentMap, SSTableCache.INSTANCE.componentMapFromStats(ssTable0, descriptor0));
final BloomFilter filter = SSTableCache.INSTANCE.bloomFilter(ssTable0, descriptor0);
assertTrue(SSTableCache.INSTANCE.containsSummary(ssTable0));
assertTrue(SSTableCache.INSTANCE.containsIndex(ssTable0));
assertTrue(SSTableCache.INSTANCE.containsStats(ssTable0));
assertTrue(SSTableCache.INSTANCE.containsFilter(ssTable0));
assertTrue(filter.isPresent(key1.first()));
assertTrue(filter.isPresent(key1.last()));
final DataLayer.SSTable ssTable1 = sstables.get(1);
final Descriptor descriptor1 = Descriptor.fromFilename(new File(String.format("./%s/%s", schema.keyspace, schema.table), dataFile1.getFileName().toString()));
assertFalse(SSTableCache.INSTANCE.containsSummary(ssTable1));
assertFalse(SSTableCache.INSTANCE.containsIndex(ssTable1));
assertFalse(SSTableCache.INSTANCE.containsStats(ssTable1));
assertFalse(SSTableCache.INSTANCE.containsFilter(ssTable1));
final SummaryDbUtils.Summary key3 = SSTableCache.INSTANCE.keysFromSummary(metaData, ssTable1);
assertNotEquals(key1.first(), key3.first());
assertNotEquals(key1.last(), key3.last());
final Pair<DecoratedKey, DecoratedKey> key4 = SSTableCache.INSTANCE.keysFromIndex(metaData, ssTable1);
assertNotEquals(key1.first(), key4.getLeft());
assertNotEquals(key1.last(), key4.getRight());
assertEquals(SSTableCache.INSTANCE.keysFromSummary(metaData, ssTable1).first(), SSTableCache.INSTANCE.keysFromIndex(metaData, ssTable1).getLeft());
assertEquals(SSTableCache.INSTANCE.keysFromSummary(metaData, ssTable1).last(), SSTableCache.INSTANCE.keysFromIndex(metaData, ssTable1).getRight());
assertNotEquals(componentMap, SSTableCache.INSTANCE.componentMapFromStats(ssTable1, descriptor1));
final Pair<DecoratedKey, DecoratedKey> key5 = SSTableCache.INSTANCE.keysFromIndex(metaData, ssTable1);
assertTrue(SSTableCache.INSTANCE.bloomFilter(ssTable1, descriptor1).isPresent(key5.getLeft()));
assertTrue(SSTableCache.INSTANCE.containsSummary(ssTable1));
assertTrue(SSTableCache.INSTANCE.containsIndex(ssTable1));
assertTrue(SSTableCache.INSTANCE.containsStats(ssTable1));
assertTrue(SSTableCache.INSTANCE.containsFilter(ssTable1));
});
}
Aggregations