use of org.apache.cassandra.spark.shaded.fourzero.cassandra.dht.IPartitioner in project spark-cassandra-bulkreader by jberragan.
the class IndexDbTests method testReadToken.
@Test
public void testReadToken() {
qt().withExamples(500).forAll(TestUtils.partitioners(), integers().all()).checkAssert((partitioner, value) -> {
final IPartitioner iPartitioner = FourZero.getPartitioner(partitioner);
final BigInteger expectedToken = token(iPartitioner, value);
try (final DataInputStream in = mockDataInputStream(value, 0)) {
IndexDbUtils.readNextToken(iPartitioner, in, new Stats() {
public void readPartitionIndexDb(ByteBuffer key, BigInteger token) {
assertEquals(value.intValue(), key.getInt());
assertEquals(expectedToken, token);
}
});
} catch (IOException e) {
throw new RuntimeException(e);
}
});
}
use of org.apache.cassandra.spark.shaded.fourzero.cassandra.dht.IPartitioner in project spark-cassandra-bulkreader by jberragan.
the class IndexDbTests method testFindStartEndOffset.
@Test
public void testFindStartEndOffset() {
qt().forAll(TestUtils.partitioners()).checkAssert((partitioner) -> {
final IPartitioner iPartitioner = FourZero.getPartitioner(partitioner);
final int rowSize = 256;
final int numValues = 5000;
// generate random index row values and sort by token
final IndexRow[] rows = IntStream.range(0, numValues).mapToObj(i -> new IndexRow(iPartitioner, RandomUtils.randomPositiveInt(100000))).sorted().toArray(IndexRow[]::new);
// update position offset
IntStream.range(0, rows.length).forEach(i -> rows[i].position = i * rowSize);
final int startPos = rows.length >> 1;
final IndexRow startRow = rows[startPos];
final int[] valuesAndOffsets = Arrays.stream(rows).map(i -> new int[] { i.value, i.position }).flatMapToInt(Arrays::stream).toArray();
try (final DataInputStream in = mockDataInputStream(valuesAndOffsets)) {
final long startOffset = IndexDbUtils.findStartOffset(in, iPartitioner, Range.closed(startRow.token, startRow.token), Stats.DoNothingStats.INSTANCE);
assertEquals(rows[startPos - 1].position, startOffset);
FourZeroUtils.skipRowIndexEntry(in);
} catch (IOException e) {
throw new RuntimeException(e);
}
});
}
use of org.apache.cassandra.spark.shaded.fourzero.cassandra.dht.IPartitioner in project spark-cassandra-bulkreader by jberragan.
the class IndexDbTests method testSearchIndex.
@Test
public void testSearchIndex() {
runTest((partitioner, dir, bridge) -> {
final TestSchema schema = TestSchema.basicBuilder(bridge).withCompression(false).build();
final IPartitioner iPartitioner = FourZero.getPartitioner(partitioner);
final int numRows = 5000;
// write an sstable and record token
final List<BigInteger> tokens = new ArrayList<>(numRows);
TestUtils.writeSSTable(bridge, dir, partitioner, schema, (writer) -> {
for (int i = 0; i < numRows; i++) {
final ByteBuffer key = (ByteBuffer) ByteBuffer.allocate(4).putInt(i).flip();
final BigInteger token = FourZeroUtils.tokenToBigInteger(iPartitioner.decorateKey(key).getToken());
tokens.add(token);
writer.write(i, 0, i);
}
});
assertEquals(1, countSSTables(dir));
Collections.sort(tokens);
final TableMetadata metadata = Schema.instance.getTableMetadata(schema.keyspace, schema.table);
if (metadata == null) {
throw new NullPointerException("Could not find table");
}
final Path summaryDb = TestUtils.getFirstFileType(dir, DataLayer.FileType.SUMMARY);
assertNotNull(summaryDb);
final LocalDataLayer dataLayer = new LocalDataLayer(CassandraBridge.CassandraVersion.FOURZERO, partitioner, schema.keyspace, schema.createStmt, false, Collections.emptySet(), true, null, dir.toString());
final DataLayer.SSTable ssTable = dataLayer.listSSTables().findFirst().orElseThrow(() -> new RuntimeException("Could not find sstable"));
final int rowSize = 39;
final int sample = 4;
// sample the token list and read offset in Index.db for sampled list & verify the offset matches the expected
// we sample the list as IndexDbUtils.findStartOffset(...) returns the previous offset, so we want to test
// it correctly skips tokens less than the token we are looking for before returning.
final List<BigInteger> sparseList = IntStream.range(0, tokens.size()).filter(i -> i > 0 && i % sample == 0).mapToObj(tokens::get).collect(Collectors.toList());
assertEquals((numRows / 4) - 1, sparseList.size());
try (final DataInputStream in = new DataInputStream(Objects.requireNonNull(ssTable.openPrimaryIndexStream()))) {
try {
for (int idx = 0; idx < sparseList.size(); idx++) {
final BigInteger token = sparseList.get(idx);
final long expectedOffset = (((idx + 1L) * sample) - 1) * rowSize;
final long offset = IndexDbUtils.findStartOffset(in, iPartitioner, Range.closed(token, token), Stats.DoNothingStats.INSTANCE);
assertEquals(expectedOffset, offset);
FourZeroUtils.skipRowIndexEntry(in);
}
} catch (final EOFException ignore) {
}
}
});
}
use of org.apache.cassandra.spark.shaded.fourzero.cassandra.dht.IPartitioner in project spark-cassandra-bulkreader by jberragan.
the class FourZeroUtils method filterKeyInBloomFilter.
static List<CustomFilter> filterKeyInBloomFilter(@NotNull final DataLayer.SSTable ssTable, @NotNull final IPartitioner partitioner, final Descriptor descriptor, @NotNull final List<CustomFilter> filters) throws IOException {
try {
final BloomFilter bloomFilter = SSTableCache.INSTANCE.bloomFilter(ssTable, descriptor);
final Function<CustomFilter, Boolean> canApplyMatch = CustomFilter::canFilterByKey;
final Function<PartitionKeyFilter, Boolean> isKeyPresent = filter -> bloomFilter.isPresent(partitioner.decorateKey(filter.key()));
return filters.stream().filter(filter -> filter.matchFound(canApplyMatch, isKeyPresent)).collect(Collectors.toList());
} catch (Exception e) {
if (e instanceof FileNotFoundException) {
return filters;
}
throw e;
}
}
use of org.apache.cassandra.spark.shaded.fourzero.cassandra.dht.IPartitioner in project spark-cassandra-bulkreader by jberragan.
the class FourZeroUtils method keysFromIndex.
static Pair<DecoratedKey, DecoratedKey> keysFromIndex(@NotNull final TableMetadata metadata, @NotNull final DataLayer.SSTable ssTable) throws IOException {
try (final InputStream primaryIndex = ssTable.openPrimaryIndexStream()) {
if (primaryIndex != null) {
final IPartitioner partitioner = metadata.partitioner;
final Pair<ByteBuffer, ByteBuffer> keys = FourZeroUtils.readPrimaryIndex(primaryIndex, true, Collections.emptyList());
return Pair.of(partitioner.decorateKey(keys.getLeft()), partitioner.decorateKey(keys.getRight()));
}
}
return Pair.of(null, null);
}
Aggregations