use of org.apache.cassandra.spark.reader.Rid in project spark-cassandra-bulkreader by jberragan.
the class SSTableReaderTests method testIncrementalRepair.
// incremental repair
@Test
public void testIncrementalRepair() {
runTest((partitioner, dir, bridge) -> {
final TestSchema schema = TestSchema.basic(bridge);
final int numSSTables = 4;
final int numRepaired = 2;
final int numUnRepaired = numSSTables - numRepaired;
// write some SSTables
for (int a = 0; a < numSSTables; a++) {
final int pos = a * NUM_ROWS;
TestUtils.writeSSTable(bridge, dir, partitioner, schema, (writer) -> {
for (int i = pos; i < pos + NUM_ROWS; i++) {
for (int j = 0; j < NUM_COLS; j++) {
writer.write(i, j, i + j);
}
}
});
}
assertEquals(numSSTables, countSSTables(dir));
final TableMetadata metaData = new FourZeroSchemaBuilder(schema.createStmt, schema.keyspace, new ReplicationFactor(ReplicationFactor.ReplicationStrategy.SimpleStrategy, ImmutableMap.of("replication_factor", 1)), partitioner).tableMetaData();
final TestDataLayer dataLayer = new TestDataLayer(bridge, getFileType(dir, DataLayer.FileType.DATA).collect(Collectors.toList()));
final AtomicInteger skipCount = new AtomicInteger(0);
final Stats stats = new Stats() {
@Override
public void skippedRepairedSSTable(DataLayer.SSTable ssTable, long repairedAt) {
skipCount.incrementAndGet();
}
};
// mark some SSTables as repaired
final Map<DataLayer.SSTable, Boolean> isRepaired = dataLayer.listSSTables().collect(Collectors.toMap(Function.identity(), a -> false));
int count = 0;
for (final DataLayer.SSTable ssTable : isRepaired.keySet()) {
if (count < numRepaired) {
isRepaired.put(ssTable, true);
count++;
}
}
final List<FourZeroSSTableReader> primaryReaders = dataLayer.listSSTables().map(ssTable -> openIncrementalReader(metaData, ssTable, stats, true, isRepaired.get(ssTable))).filter(reader -> !reader.ignore()).collect(Collectors.toList());
final List<FourZeroSSTableReader> nonPrimaryReaders = dataLayer.listSSTables().map(ssTable -> openIncrementalReader(metaData, ssTable, stats, false, isRepaired.get(ssTable))).filter(reader -> !reader.ignore()).collect(Collectors.toList());
// primary repair replica should read all sstables
assertEquals(numSSTables, primaryReaders.size());
// non-primary repair replica should only read unrepaired sstables
assertEquals(numUnRepaired, nonPrimaryReaders.size());
for (final FourZeroSSTableReader reader : nonPrimaryReaders) {
assertFalse(isRepaired.get(reader.sstable()));
}
assertEquals(numUnRepaired, skipCount.get());
final Set<FourZeroSSTableReader> toCompact = Stream.concat(primaryReaders.stream().filter(r -> isRepaired.get(r.sstable())), nonPrimaryReaders.stream()).collect(Collectors.toSet());
assertEquals(numSSTables, toCompact.size());
int rowCount = 0;
boolean[] found = new boolean[numSSTables * NUM_ROWS];
try (final CompactionStreamScanner scanner = new CompactionStreamScanner(metaData, partitioner, toCompact)) {
// iterate through CompactionScanner and verify we have all the partition keys we are looking for
final Rid rid = scanner.getRid();
while (scanner.hasNext()) {
scanner.next();
final int a = rid.getPartitionKey().asIntBuffer().get();
found[a] = true;
// extract clustering key value and column name
final ByteBuffer colBuf = rid.getColumnName();
final ByteBuffer clusteringKey = ByteBufUtils.readBytesWithShortLength(colBuf);
colBuf.get();
final String colName = ByteBufUtils.string(ByteBufUtils.readBytesWithShortLength(colBuf));
colBuf.get();
if (StringUtils.isEmpty(colName)) {
continue;
}
assertEquals("c", colName);
final int b = clusteringKey.asIntBuffer().get();
// extract value column
final int c = rid.getValue().asIntBuffer().get();
assertEquals(c, a + b);
rowCount++;
}
}
assertEquals(numSSTables * NUM_ROWS * NUM_COLS, rowCount);
for (final boolean b : found) {
assertTrue(b);
}
});
}
use of org.apache.cassandra.spark.reader.Rid in project spark-cassandra-bulkreader by jberragan.
the class SparkRowIteratorTests method testRowIterator.
private static void testRowIterator(final CassandraBridge.CassandraVersion version, final TestSchema schema, final TestSchema.TestRow[] testRows) throws IOException {
final CassandraBridge bridge = CassandraBridge.get(version);
final CqlSchema cqlSchema = schema.buildSchema();
final int numRows = testRows.length;
final int numColumns = cqlSchema.fields().size() - cqlSchema.numPartitionKeys() - cqlSchema.numClusteringKeys();
final List<CqlField> columns = cqlSchema.fields().stream().filter(f -> !f.isPartitionKey()).filter(f -> !f.isClusteringColumn()).sorted().collect(Collectors.toList());
final Rid rid = new Rid();
final AtomicInteger rowPos = new AtomicInteger();
final AtomicInteger colPos = new AtomicInteger();
// mock data layer
final DataLayer dataLayer = mock(DataLayer.class);
when(dataLayer.cqlSchema()).thenReturn(cqlSchema);
when(dataLayer.version()).thenReturn(version);
when(dataLayer.isInPartition(any(BigInteger.class), any(ByteBuffer.class))).thenReturn(true);
when(dataLayer.bridge()).thenCallRealMethod();
when(dataLayer.stats()).thenReturn(Stats.DoNothingStats.INSTANCE);
when(dataLayer.requestedFeatures()).thenCallRealMethod();
// mock scanner
final IStreamScanner scanner = mock(IStreamScanner.class);
when(scanner.hasNext()).thenAnswer(invocation -> rowPos.get() < numRows);
when(scanner.getRid()).thenReturn(rid);
doAnswer(invocation -> {
final int col = colPos.getAndIncrement();
final TestSchema.TestRow testRow = testRows[rowPos.get()];
// write next partition key
if (col == 0) {
if (cqlSchema.numPartitionKeys() == 1) {
final CqlField partitionKey = cqlSchema.partitionKeys().get(0);
rid.setPartitionKeyCopy(partitionKey.serialize(testRow.get(partitionKey.pos())), BigInteger.ONE);
} else {
assert cqlSchema.numPartitionKeys() > 1;
final ByteBuffer[] partitionBuffers = new ByteBuffer[cqlSchema.numPartitionKeys()];
int pos = 0;
for (final CqlField partitionKey : cqlSchema.partitionKeys()) {
partitionBuffers[pos] = partitionKey.serialize(testRow.get(partitionKey.pos()));
pos++;
}
rid.setPartitionKeyCopy(ColumnTypes.build(false, partitionBuffers), BigInteger.ONE);
}
}
// write next clustering keys & column name
final CqlField column = columns.get(col);
final ByteBuffer[] colBuffers = new ByteBuffer[cqlSchema.numClusteringKeys() + 1];
int pos = 0;
for (final CqlField clusteringColumn : cqlSchema.clusteringKeys()) {
colBuffers[pos] = clusteringColumn.serialize(testRow.get(clusteringColumn.pos()));
pos++;
}
colBuffers[pos] = bridge.ascii().serialize(column.name());
rid.setColumnNameCopy(ColumnTypes.build(false, colBuffers));
// write value, timestamp and tombstone
rid.setValueCopy(column.serialize(testRow.get(column.pos())));
// move to next row
if (colPos.get() == numColumns) {
if (rowPos.getAndIncrement() >= numRows) {
throw new IllegalStateException("Went too far...");
}
// reset column position
colPos.set(0);
}
return null;
}).when(scanner).next();
when(dataLayer.openCompactionScanner(anyList(), any())).thenReturn(scanner);
// use SparkRowIterator and verify values match expected
final SparkRowIterator it = new SparkRowIterator(dataLayer);
int rowCount = 0;
while (it.next()) {
while (rowCount < testRows.length && testRows[rowCount].isTombstone()) // skip tombstones
{
rowCount++;
}
if (rowCount >= testRows.length) {
break;
}
final TestSchema.TestRow row = testRows[rowCount];
assertEquals(row, schema.toTestRow(it.get()));
rowCount++;
}
assertEquals(numRows, rowCount);
it.close();
}
use of org.apache.cassandra.spark.reader.Rid in project spark-cassandra-bulkreader by jberragan.
the class SSTableReaderTests method testOpenCompactionScanner.
@Test
public void testOpenCompactionScanner() {
runTest((partitioner, dir, bridge) -> {
// write 3 SSTables
final TestSchema schema = TestSchema.basic(bridge);
TestUtils.writeSSTable(bridge, dir, partitioner, schema, (writer) -> {
for (int i = 0; i < NUM_ROWS; i++) {
for (int j = 0; j < NUM_COLS; j++) {
writer.write(i, j, -1);
}
}
});
TestUtils.writeSSTable(bridge, dir, partitioner, schema, (writer) -> {
for (int i = 0; i < NUM_ROWS; i++) {
for (int j = 0; j < NUM_COLS; j++) {
writer.write(i, j, -2);
}
}
});
TestUtils.writeSSTable(bridge, dir, partitioner, schema, (writer) -> {
for (int i = 0; i < NUM_ROWS; i++) {
for (int j = 0; j < NUM_COLS; j++) {
writer.write(i, j, i + j);
}
}
});
assertEquals(3, countSSTables(dir));
// open CompactionStreamScanner over 3 SSTables
final TableMetadata metaData = schema.schemaBuilder(partitioner).tableMetaData();
final TestDataLayer dataLayer = new TestDataLayer(bridge, getFileType(dir, DataLayer.FileType.DATA).collect(Collectors.toList()));
final Set<FourZeroSSTableReader> toCompact = dataLayer.listSSTables().map(ssTable -> openReader(metaData, ssTable)).collect(Collectors.toSet());
int count = 0;
try (final CompactionStreamScanner scanner = new CompactionStreamScanner(metaData, partitioner, toCompact)) {
// iterate through CompactionStreamScanner verifying it correctly compacts data together
final Rid rid = scanner.getRid();
while (scanner.hasNext()) {
scanner.next();
// extract partition key value
final int a = rid.getPartitionKey().asIntBuffer().get();
// extract clustering key value and column name
final ByteBuffer colBuf = rid.getColumnName();
final ByteBuffer clusteringKey = ByteBufUtils.readBytesWithShortLength(colBuf);
colBuf.get();
final String colName = ByteBufUtils.string(ByteBufUtils.readBytesWithShortLength(colBuf));
colBuf.get();
if (StringUtils.isEmpty(colName)) {
continue;
}
assertEquals("c", colName);
final int b = clusteringKey.asIntBuffer().get();
// extract value column
final int c = rid.getValue().asIntBuffer().get();
// verify CompactionIterator compacts 3 sstables to use last values written
assertEquals(c, a + b);
count++;
}
}
assertEquals(NUM_ROWS * NUM_COLS, count);
});
}
Aggregations