use of org.apache.cassandra.spark.TestSchema in project spark-cassandra-bulkreader by jberragan.
the class IndexDbTests method testSearchIndex.
@Test
public void testSearchIndex() {
runTest((partitioner, dir, bridge) -> {
final TestSchema schema = TestSchema.basicBuilder(bridge).withCompression(false).build();
final IPartitioner iPartitioner = FourZero.getPartitioner(partitioner);
final int numRows = 5000;
// write an sstable and record token
final List<BigInteger> tokens = new ArrayList<>(numRows);
TestUtils.writeSSTable(bridge, dir, partitioner, schema, (writer) -> {
for (int i = 0; i < numRows; i++) {
final ByteBuffer key = (ByteBuffer) ByteBuffer.allocate(4).putInt(i).flip();
final BigInteger token = FourZeroUtils.tokenToBigInteger(iPartitioner.decorateKey(key).getToken());
tokens.add(token);
writer.write(i, 0, i);
}
});
assertEquals(1, countSSTables(dir));
Collections.sort(tokens);
final TableMetadata metadata = Schema.instance.getTableMetadata(schema.keyspace, schema.table);
if (metadata == null) {
throw new NullPointerException("Could not find table");
}
final Path summaryDb = TestUtils.getFirstFileType(dir, DataLayer.FileType.SUMMARY);
assertNotNull(summaryDb);
final LocalDataLayer dataLayer = new LocalDataLayer(CassandraBridge.CassandraVersion.FOURZERO, partitioner, schema.keyspace, schema.createStmt, false, Collections.emptySet(), true, null, dir.toString());
final DataLayer.SSTable ssTable = dataLayer.listSSTables().findFirst().orElseThrow(() -> new RuntimeException("Could not find sstable"));
final int rowSize = 39;
final int sample = 4;
// sample the token list and read offset in Index.db for sampled list & verify the offset matches the expected
// we sample the list as IndexDbUtils.findStartOffset(...) returns the previous offset, so we want to test
// it correctly skips tokens less than the token we are looking for before returning.
final List<BigInteger> sparseList = IntStream.range(0, tokens.size()).filter(i -> i > 0 && i % sample == 0).mapToObj(tokens::get).collect(Collectors.toList());
assertEquals((numRows / 4) - 1, sparseList.size());
try (final DataInputStream in = new DataInputStream(Objects.requireNonNull(ssTable.openPrimaryIndexStream()))) {
try {
for (int idx = 0; idx < sparseList.size(); idx++) {
final BigInteger token = sparseList.get(idx);
final long expectedOffset = (((idx + 1L) * sample) - 1) * rowSize;
final long offset = IndexDbUtils.findStartOffset(in, iPartitioner, Range.closed(token, token), Stats.DoNothingStats.INSTANCE);
assertEquals(expectedOffset, offset);
FourZeroUtils.skipRowIndexEntry(in);
}
} catch (final EOFException ignore) {
}
}
});
}
use of org.apache.cassandra.spark.TestSchema in project spark-cassandra-bulkreader by jberragan.
the class SSTableReaderTests method testSkipNoPartitions.
@Test
public void testSkipNoPartitions() {
runTest((partitioner, dir, bridge) -> {
// write an SSTable
final TestSchema schema = TestSchema.basic(bridge);
TestUtils.writeSSTable(bridge, dir, partitioner, schema, (writer) -> {
for (int i = 0; i < NUM_ROWS; i++) {
for (int j = 0; j < NUM_COLS; j++) {
writer.write(i, j, i + j);
}
}
});
assertEquals(1, countSSTables(dir));
final Path dataFile = getFirstFileType(dir, DataLayer.FileType.DATA);
final Path summaryFile = getFirstFileType(dir, DataLayer.FileType.SUMMARY);
final TableMetadata metaData = schema.schemaBuilder(partitioner).tableMetaData();
final TestDataLayer dataLayer = new TestDataLayer(bridge, Collections.singletonList(dataFile));
SummaryDbUtils.Summary summary;
try (final InputStream in = new BufferedInputStream(Files.newInputStream(summaryFile))) {
summary = SummaryDbUtils.readSummary(in, metaData.partitioner, metaData.params.minIndexInterval, metaData.params.maxIndexInterval);
}
// set Spark token range equal to SSTable token range
final Range<BigInteger> sparkTokenRange = Range.closed(FourZeroUtils.tokenToBigInteger(summary.first().getToken()), FourZeroUtils.tokenToBigInteger(summary.last().getToken()));
final SparkRangeFilter rangeFilter = SparkRangeFilter.create(sparkTokenRange);
final AtomicBoolean skipped = new AtomicBoolean(false);
final Stats stats = new Stats() {
@Override
public void skippedPartition(ByteBuffer key, BigInteger token) {
LOGGER.error("Skipped partition when should not: " + token);
skipped.set(true);
}
};
final FourZeroSSTableReader reader = openReader(metaData, dataLayer.listSSTables().findFirst().orElseThrow(() -> new RuntimeException("Could not find SSTable")), Collections.singletonList(rangeFilter), true, stats);
// shouldn't skip any partitions here
assertEquals(NUM_ROWS * NUM_COLS, countAndValidateRows(reader));
assertFalse(skipped.get());
});
}
use of org.apache.cassandra.spark.TestSchema in project spark-cassandra-bulkreader by jberragan.
the class SSTableReaderTests method testPartialFilterMatch.
@Test
public void testPartialFilterMatch() {
runTest((partitioner, dir, bridge) -> {
// write an SSTable
final TestSchema schema = TestSchema.basic(bridge);
TestUtils.writeSSTable(bridge, dir, partitioner, schema, (writer) -> {
for (int i = 0; i < NUM_ROWS; i++) {
for (int j = 0; j < NUM_COLS; j++) {
writer.write(i, j, i + j);
}
}
});
assertEquals(1, countSSTables(dir));
final Path dataFile = getFirstFileType(dir, DataLayer.FileType.DATA);
final TableMetadata metaData = new FourZeroSchemaBuilder(schema.createStmt, schema.keyspace, new ReplicationFactor(ReplicationFactor.ReplicationStrategy.SimpleStrategy, ImmutableMap.of("replication_factor", 1)), partitioner).tableMetaData();
final TestDataLayer dataLayer = new TestDataLayer(bridge, Collections.singletonList(dataFile));
final ByteBuffer key1 = Int32Type.instance.fromString("0");
final BigInteger token1 = bridge.hash(partitioner, key1);
final PartitionKeyFilter keyInSSTable = PartitionKeyFilter.create(key1, token1);
final SparkRangeFilter rangeFilter = SparkRangeFilter.create(Range.closed(token1, token1));
final ByteBuffer key2 = Int32Type.instance.fromString("55");
final BigInteger token2 = bridge.hash(partitioner, key2);
final PartitionKeyFilter keyNotInSSTable = PartitionKeyFilter.create(key2, token2);
final List<CustomFilter> filters = Arrays.asList(rangeFilter, keyInSSTable, keyNotInSSTable);
final AtomicBoolean pass = new AtomicBoolean(true);
final AtomicInteger skipCount = new AtomicInteger(0);
final Stats stats = new Stats() {
@Override
public void skippedPartition(ByteBuffer key, BigInteger token) {
LOGGER.info("Skipping partition: " + token);
skipCount.incrementAndGet();
if (filters.stream().anyMatch(filter -> !filter.skipPartition(key, token))) {
LOGGER.info("Should not skip partition: " + token);
pass.set(false);
}
}
};
final FourZeroSSTableReader reader = openReader(metaData, dataLayer.listSSTables().findFirst().orElseThrow(() -> new RuntimeException("Could not find SSTable")), filters, false, stats);
final int rows = countAndValidateRows(reader);
assertTrue(skipCount.get() > 0);
assertEquals(NUM_COLS, rows);
// should skip partitions not matching filters
assertEquals((NUM_ROWS - skipCount.get()) * NUM_COLS, rows);
assertTrue(pass.get());
});
}
use of org.apache.cassandra.spark.TestSchema in project spark-cassandra-bulkreader by jberragan.
the class SSTableReaderTests method testOpenSSTableReader.
@Test
public void testOpenSSTableReader() {
runTest((partitioner, dir, bridge) -> {
// write an SSTable
final TestSchema schema = TestSchema.basic(bridge);
TestUtils.writeSSTable(bridge, dir, partitioner, schema, (writer) -> {
for (int i = 0; i < NUM_ROWS; i++) {
for (int j = 0; j < NUM_COLS; j++) {
writer.write(i, j, i + j);
}
}
});
assertEquals(1, countSSTables(dir));
final Path dataFile = getFirstFileType(dir, DataLayer.FileType.DATA);
final TableMetadata metaData = schema.schemaBuilder(partitioner).tableMetaData();
final TestDataLayer dataLayer = new TestDataLayer(bridge, Collections.singletonList(dataFile));
final FourZeroSSTableReader reader = openReader(metaData, dataLayer.listSSTables().findFirst().orElseThrow(() -> new RuntimeException("Could not find SSTable")));
assertNotNull(reader.firstToken());
assertNotNull(reader.lastToken());
assertNotNull(reader.getSSTableMetadata());
assertFalse(reader.isRepaired());
assertEquals(NUM_ROWS * NUM_COLS, countAndValidateRows(reader));
});
}
use of org.apache.cassandra.spark.TestSchema in project spark-cassandra-bulkreader by jberragan.
the class SSTableReaderTests method testSSTableRange.
@Test
public void testSSTableRange() {
runTest((partitioner, dir, bridge) -> {
// write an SSTable
final TestSchema schema = TestSchema.basic(bridge);
TestUtils.writeSSTable(bridge, dir, partitioner, schema, (writer) -> {
for (int i = 0; i < 10; i++) {
for (int j = 0; j < 1; j++) {
writer.write(i, j, i + j);
}
}
});
assertEquals(1, countSSTables(dir));
final Path dataFile = getFirstFileType(dir, DataLayer.FileType.DATA);
final TableMetadata metaData = schema.schemaBuilder(partitioner).tableMetaData();
final TestDataLayer dataLayer = new TestDataLayer(bridge, Collections.singletonList(dataFile));
final SparkSSTableReader reader = openReader(metaData, dataLayer.listSSTables().findFirst().orElseThrow(() -> new RuntimeException("Could not find SSTable")));
assertNotNull(reader.firstToken());
assertNotNull(reader.lastToken());
// verify primary Index.db file matches first and last
final Path indexFile = getFirstFileType(dir, DataLayer.FileType.INDEX);
final Pair<DecoratedKey, DecoratedKey> firstAndLast;
try (final InputStream is = new BufferedInputStream(new FileInputStream(indexFile.toFile()))) {
final Pair<ByteBuffer, ByteBuffer> keys = FourZeroUtils.readPrimaryIndex(is, true, Collections.emptyList());
firstAndLast = Pair.of(FourZero.getPartitioner(partitioner).decorateKey(keys.getLeft()), FourZero.getPartitioner(partitioner).decorateKey(keys.getRight()));
}
final BigInteger first = FourZeroUtils.tokenToBigInteger(firstAndLast.getLeft().getToken());
final BigInteger last = FourZeroUtils.tokenToBigInteger(firstAndLast.getRight().getToken());
assertEquals(first, reader.firstToken());
assertEquals(last, reader.lastToken());
switch(partitioner) {
case Murmur3Partitioner:
assertFalse(SparkSSTableReader.overlaps(reader, Range.closed(Partitioner.Murmur3Partitioner.minToken(), Partitioner.Murmur3Partitioner.minToken())));
assertFalse(SparkSSTableReader.overlaps(reader, Range.closed(Partitioner.Murmur3Partitioner.minToken(), Partitioner.Murmur3Partitioner.minToken())));
assertFalse(SparkSSTableReader.overlaps(reader, Range.closed(BigInteger.valueOf(-8710962479251732708L), BigInteger.valueOf(-7686143364045646507L))));
assertTrue(SparkSSTableReader.overlaps(reader, Range.closed(BigInteger.valueOf(-7509452495886106294L), BigInteger.valueOf(-7509452495886106293L))));
assertTrue(SparkSSTableReader.overlaps(reader, Range.closed(BigInteger.valueOf(-7509452495886106293L), BigInteger.valueOf(-7509452495886106293L))));
assertTrue(SparkSSTableReader.overlaps(reader, Range.closed(BigInteger.valueOf(-7509452495886106293L), BigInteger.valueOf(2562047788015215502L))));
assertTrue(SparkSSTableReader.overlaps(reader, Range.closed(BigInteger.valueOf(-7509452495886106293L), BigInteger.valueOf(9010454139840013625L))));
assertTrue(SparkSSTableReader.overlaps(reader, Range.closed(BigInteger.valueOf(9010454139840013625L), BigInteger.valueOf(9010454139840013625L))));
assertFalse(SparkSSTableReader.overlaps(reader, Range.closed(Partitioner.Murmur3Partitioner.maxToken(), Partitioner.Murmur3Partitioner.maxToken())));
return;
case RandomPartitioner:
assertFalse(SparkSSTableReader.overlaps(reader, Range.closed(Partitioner.RandomPartitioner.minToken(), Partitioner.RandomPartitioner.minToken())));
assertFalse(SparkSSTableReader.overlaps(reader, Range.closed(BigInteger.valueOf(0L), BigInteger.valueOf(500L))));
assertFalse(SparkSSTableReader.overlaps(reader, Range.closed(new BigInteger("18837662806270881894834867523173387677"), new BigInteger("18837662806270881894834867523173387677"))));
assertTrue(SparkSSTableReader.overlaps(reader, Range.closed(new BigInteger("18837662806270881894834867523173387678"), new BigInteger("18837662806270881894834867523173387678"))));
assertTrue(SparkSSTableReader.overlaps(reader, Range.closed(new BigInteger("18837662806270881894834867523173387679"), new BigInteger("18837662806270881894834867523173387679"))));
assertTrue(SparkSSTableReader.overlaps(reader, Range.closed(new BigInteger("18837662806270881894834867523173387679"), new BigInteger("137731376325982006772573399291321493164"))));
assertTrue(SparkSSTableReader.overlaps(reader, Range.closed(new BigInteger("137731376325982006772573399291321493164"), new BigInteger("137731376325982006772573399291321493164"))));
assertFalse(SparkSSTableReader.overlaps(reader, Range.closed(new BigInteger("137731376325982006772573399291321493165"), new BigInteger("137731376325982006772573399291321493165"))));
assertFalse(SparkSSTableReader.overlaps(reader, Range.closed(Partitioner.RandomPartitioner.maxToken(), Partitioner.RandomPartitioner.maxToken())));
return;
default:
throw new RuntimeException("Unexpected partitioner: " + partitioner);
}
});
}
Aggregations