use of org.apache.cassandra.spark.data.ReplicationFactor in project spark-cassandra-bulkreader by jberragan.
the class SSTableReaderTests method testPartialFilterMatch.
@Test
public void testPartialFilterMatch() {
runTest((partitioner, dir, bridge) -> {
// write an SSTable
final TestSchema schema = TestSchema.basic(bridge);
TestUtils.writeSSTable(bridge, dir, partitioner, schema, (writer) -> {
for (int i = 0; i < NUM_ROWS; i++) {
for (int j = 0; j < NUM_COLS; j++) {
writer.write(i, j, i + j);
}
}
});
assertEquals(1, countSSTables(dir));
final Path dataFile = getFirstFileType(dir, DataLayer.FileType.DATA);
final TableMetadata metaData = new FourZeroSchemaBuilder(schema.createStmt, schema.keyspace, new ReplicationFactor(ReplicationFactor.ReplicationStrategy.SimpleStrategy, ImmutableMap.of("replication_factor", 1)), partitioner).tableMetaData();
final TestDataLayer dataLayer = new TestDataLayer(bridge, Collections.singletonList(dataFile));
final ByteBuffer key1 = Int32Type.instance.fromString("0");
final BigInteger token1 = bridge.hash(partitioner, key1);
final PartitionKeyFilter keyInSSTable = PartitionKeyFilter.create(key1, token1);
final SparkRangeFilter rangeFilter = SparkRangeFilter.create(Range.closed(token1, token1));
final ByteBuffer key2 = Int32Type.instance.fromString("55");
final BigInteger token2 = bridge.hash(partitioner, key2);
final PartitionKeyFilter keyNotInSSTable = PartitionKeyFilter.create(key2, token2);
final List<CustomFilter> filters = Arrays.asList(rangeFilter, keyInSSTable, keyNotInSSTable);
final AtomicBoolean pass = new AtomicBoolean(true);
final AtomicInteger skipCount = new AtomicInteger(0);
final Stats stats = new Stats() {
@Override
public void skippedPartition(ByteBuffer key, BigInteger token) {
LOGGER.info("Skipping partition: " + token);
skipCount.incrementAndGet();
if (filters.stream().anyMatch(filter -> !filter.skipPartition(key, token))) {
LOGGER.info("Should not skip partition: " + token);
pass.set(false);
}
}
};
final FourZeroSSTableReader reader = openReader(metaData, dataLayer.listSSTables().findFirst().orElseThrow(() -> new RuntimeException("Could not find SSTable")), filters, false, stats);
final int rows = countAndValidateRows(reader);
assertTrue(skipCount.get() > 0);
assertEquals(NUM_COLS, rows);
// should skip partitions not matching filters
assertEquals((NUM_ROWS - skipCount.get()) * NUM_COLS, rows);
assertTrue(pass.get());
});
}
use of org.apache.cassandra.spark.data.ReplicationFactor in project spark-cassandra-bulkreader by jberragan.
the class SSTableReaderTests method testIncrementalRepair.
// incremental repair
@Test
public void testIncrementalRepair() {
runTest((partitioner, dir, bridge) -> {
final TestSchema schema = TestSchema.basic(bridge);
final int numSSTables = 4;
final int numRepaired = 2;
final int numUnRepaired = numSSTables - numRepaired;
// write some SSTables
for (int a = 0; a < numSSTables; a++) {
final int pos = a * NUM_ROWS;
TestUtils.writeSSTable(bridge, dir, partitioner, schema, (writer) -> {
for (int i = pos; i < pos + NUM_ROWS; i++) {
for (int j = 0; j < NUM_COLS; j++) {
writer.write(i, j, i + j);
}
}
});
}
assertEquals(numSSTables, countSSTables(dir));
final TableMetadata metaData = new FourZeroSchemaBuilder(schema.createStmt, schema.keyspace, new ReplicationFactor(ReplicationFactor.ReplicationStrategy.SimpleStrategy, ImmutableMap.of("replication_factor", 1)), partitioner).tableMetaData();
final TestDataLayer dataLayer = new TestDataLayer(bridge, getFileType(dir, DataLayer.FileType.DATA).collect(Collectors.toList()));
final AtomicInteger skipCount = new AtomicInteger(0);
final Stats stats = new Stats() {
@Override
public void skippedRepairedSSTable(DataLayer.SSTable ssTable, long repairedAt) {
skipCount.incrementAndGet();
}
};
// mark some SSTables as repaired
final Map<DataLayer.SSTable, Boolean> isRepaired = dataLayer.listSSTables().collect(Collectors.toMap(Function.identity(), a -> false));
int count = 0;
for (final DataLayer.SSTable ssTable : isRepaired.keySet()) {
if (count < numRepaired) {
isRepaired.put(ssTable, true);
count++;
}
}
final List<FourZeroSSTableReader> primaryReaders = dataLayer.listSSTables().map(ssTable -> openIncrementalReader(metaData, ssTable, stats, true, isRepaired.get(ssTable))).filter(reader -> !reader.ignore()).collect(Collectors.toList());
final List<FourZeroSSTableReader> nonPrimaryReaders = dataLayer.listSSTables().map(ssTable -> openIncrementalReader(metaData, ssTable, stats, false, isRepaired.get(ssTable))).filter(reader -> !reader.ignore()).collect(Collectors.toList());
// primary repair replica should read all sstables
assertEquals(numSSTables, primaryReaders.size());
// non-primary repair replica should only read unrepaired sstables
assertEquals(numUnRepaired, nonPrimaryReaders.size());
for (final FourZeroSSTableReader reader : nonPrimaryReaders) {
assertFalse(isRepaired.get(reader.sstable()));
}
assertEquals(numUnRepaired, skipCount.get());
final Set<FourZeroSSTableReader> toCompact = Stream.concat(primaryReaders.stream().filter(r -> isRepaired.get(r.sstable())), nonPrimaryReaders.stream()).collect(Collectors.toSet());
assertEquals(numSSTables, toCompact.size());
int rowCount = 0;
boolean[] found = new boolean[numSSTables * NUM_ROWS];
try (final CompactionStreamScanner scanner = new CompactionStreamScanner(metaData, partitioner, toCompact)) {
// iterate through CompactionScanner and verify we have all the partition keys we are looking for
final Rid rid = scanner.getRid();
while (scanner.hasNext()) {
scanner.next();
final int a = rid.getPartitionKey().asIntBuffer().get();
found[a] = true;
// extract clustering key value and column name
final ByteBuffer colBuf = rid.getColumnName();
final ByteBuffer clusteringKey = ByteBufUtils.readBytesWithShortLength(colBuf);
colBuf.get();
final String colName = ByteBufUtils.string(ByteBufUtils.readBytesWithShortLength(colBuf));
colBuf.get();
if (StringUtils.isEmpty(colName)) {
continue;
}
assertEquals("c", colName);
final int b = clusteringKey.asIntBuffer().get();
// extract value column
final int c = rid.getValue().asIntBuffer().get();
assertEquals(c, a + b);
rowCount++;
}
}
assertEquals(numSSTables * NUM_ROWS * NUM_COLS, rowCount);
for (final boolean b : found) {
assertTrue(b);
}
});
}
use of org.apache.cassandra.spark.data.ReplicationFactor in project spark-cassandra-bulkreader by jberragan.
the class SSTableReaderTests method testFilterKeyMissingInIndex.
@Test
public void testFilterKeyMissingInIndex() {
runTest((partitioner, dir, bridge) -> {
// write an SSTable
final TestSchema schema = TestSchema.basic(bridge);
TestUtils.writeSSTable(bridge, dir, partitioner, schema, (writer) -> {
for (int i = 0; i < NUM_ROWS; i++) {
for (int j = 0; j < NUM_COLS; j++) {
writer.write(i, j, i + j);
}
}
});
assertEquals(1, countSSTables(dir));
final Path dataFile = getFirstFileType(dir, DataLayer.FileType.DATA);
final TableMetadata metaData = new FourZeroSchemaBuilder(schema.createStmt, schema.keyspace, new ReplicationFactor(ReplicationFactor.ReplicationStrategy.SimpleStrategy, ImmutableMap.of("replication_factor", 1)), partitioner).tableMetaData();
final TestDataLayer dataLayer = new TestDataLayer(bridge, Collections.singletonList(dataFile));
final ByteBuffer key1 = Int32Type.instance.fromString("51");
final BigInteger token1 = bridge.hash(partitioner, key1);
final PartitionKeyFilter keyNotInSSTable1 = PartitionKeyFilter.create(key1, token1);
final ByteBuffer key2 = Int32Type.instance.fromString("90");
final BigInteger token2 = bridge.hash(partitioner, key2);
final PartitionKeyFilter keyNotInSSTable2 = PartitionKeyFilter.create(key2, token2);
final List<CustomFilter> filters = Arrays.asList(keyNotInSSTable1, keyNotInSSTable2);
final AtomicBoolean pass = new AtomicBoolean(true);
final AtomicInteger skipCount = new AtomicInteger(0);
final Stats stats = new Stats() {
@Override
public void skippedSSTable(List<CustomFilter> filters, BigInteger firstToken, BigInteger lastToken) {
pass.set(false);
}
@Override
public void missingInIndex() {
skipCount.incrementAndGet();
if (filters.size() != 2) {
pass.set(false);
}
}
};
final FourZeroSSTableReader reader = openReader(metaData, dataLayer.listSSTables().findFirst().orElseThrow(() -> new RuntimeException("Could not find SSTable")), filters, true, stats);
assertTrue(reader.ignore());
assertEquals(1, skipCount.get());
assertTrue(pass.get());
});
}
use of org.apache.cassandra.spark.data.ReplicationFactor in project spark-cassandra-bulkreader by jberragan.
the class SchemaBuilderTests method testUdts.
/* user defined types */
@Test
public void testUdts() {
final ReplicationFactor rf = new ReplicationFactor(ReplicationFactor.ReplicationStrategy.NetworkTopologyStrategy, ImmutableMap.of("DC1", 3, "DC2", 3));
final String keyspace = "udt_keyspace";
final String udtName = "udt_name";
final FourZeroSchemaBuilder builder = new FourZeroSchemaBuilder("CREATE TABLE " + keyspace + ".udt_test (\n" + " account_id uuid PRIMARY KEY,\n" + " balance bigint,\n" + " info " + udtName + ",\n" + " name text\n" + ");", keyspace, rf, Partitioner.Murmur3Partitioner, toSet("CREATE TYPE " + keyspace + "." + udtName + " (\n" + " birthday timestamp,\n" + " nationality text,\n" + " weight float,\n" + " height int\n" + ");"));
final CqlSchema schema = builder.build();
assertEquals(1, schema.udts().size());
final CqlField.CqlUdt udt = schema.udts().stream().findFirst().get();
assertEquals(udtName, udt.name());
final List<CqlField> udtFields = udt.fields();
assertEquals(4, udtFields.size());
assertEquals(bridge.timestamp(), udtFields.get(0).type());
assertEquals(bridge.text(), udtFields.get(1).type());
assertEquals(bridge.aFloat(), udtFields.get(2).type());
assertEquals(bridge.aInt(), udtFields.get(3).type());
final List<CqlField> fields = schema.fields();
assertEquals(bridge.uuid(), fields.get(0).type());
assertEquals(bridge.bigint(), fields.get(1).type());
assertEquals(CqlField.CqlType.InternalType.Udt, fields.get(2).type().internalType());
assertEquals(bridge.text(), fields.get(3).type());
final CqlField.CqlUdt udtField = (CqlField.CqlUdt) fields.get(2).type();
assertEquals(bridge.timestamp(), udtField.field(0).type());
assertEquals(bridge.text(), udtField.field(1).type());
assertEquals(bridge.aFloat(), udtField.field(2).type());
assertEquals(bridge.aInt(), udtField.field(3).type());
}
use of org.apache.cassandra.spark.data.ReplicationFactor in project spark-cassandra-bulkreader by jberragan.
the class SchemaBuilderTests method testCollections.
@Test
public void testCollections() {
final String create_stmt = "CREATE TABLE backup_test.collection_test (account_id uuid PRIMARY KEY, balance bigint, names set<text>);";
final ReplicationFactor rf = new ReplicationFactor(ReplicationFactor.ReplicationStrategy.NetworkTopologyStrategy, ImmutableMap.of("DC1", 3, "DC2", 3));
final CqlSchema schema = new FourZeroSchemaBuilder(create_stmt, "backup_test", rf).build();
assertEquals(schema.getField("names").type().internalType(), CqlField.CqlType.InternalType.Set);
}
Aggregations