Search in sources :

Example 1 with IPartitioner

use of org.apache.cassandra.spark.shaded.fourzero.cassandra.dht.IPartitioner in project spark-cassandra-bulkreader by jberragan.

the class IndexDbTests method testReadToken.

@Test
public void testReadToken() {
    qt().withExamples(500).forAll(TestUtils.partitioners(), integers().all()).checkAssert((partitioner, value) -> {
        final IPartitioner iPartitioner = FourZero.getPartitioner(partitioner);
        final BigInteger expectedToken = token(iPartitioner, value);
        try (final DataInputStream in = mockDataInputStream(value, 0)) {
            IndexDbUtils.readNextToken(iPartitioner, in, new Stats() {

                public void readPartitionIndexDb(ByteBuffer key, BigInteger token) {
                    assertEquals(value.intValue(), key.getInt());
                    assertEquals(expectedToken, token);
                }
            });
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    });
}
Also used : Stats(org.apache.cassandra.spark.stats.Stats) BigInteger(java.math.BigInteger) IOException(java.io.IOException) DataInputStream(java.io.DataInputStream) ByteBuffer(java.nio.ByteBuffer) IPartitioner(org.apache.cassandra.spark.shaded.fourzero.cassandra.dht.IPartitioner) TestUtils.runTest(org.apache.cassandra.spark.TestUtils.runTest) Test(org.junit.Test)

Example 2 with IPartitioner

use of org.apache.cassandra.spark.shaded.fourzero.cassandra.dht.IPartitioner in project spark-cassandra-bulkreader by jberragan.

the class IndexDbTests method testFindStartEndOffset.

@Test
public void testFindStartEndOffset() {
    qt().forAll(TestUtils.partitioners()).checkAssert((partitioner) -> {
        final IPartitioner iPartitioner = FourZero.getPartitioner(partitioner);
        final int rowSize = 256;
        final int numValues = 5000;
        // generate random index row values and sort by token
        final IndexRow[] rows = IntStream.range(0, numValues).mapToObj(i -> new IndexRow(iPartitioner, RandomUtils.randomPositiveInt(100000))).sorted().toArray(IndexRow[]::new);
        // update position offset
        IntStream.range(0, rows.length).forEach(i -> rows[i].position = i * rowSize);
        final int startPos = rows.length >> 1;
        final IndexRow startRow = rows[startPos];
        final int[] valuesAndOffsets = Arrays.stream(rows).map(i -> new int[] { i.value, i.position }).flatMapToInt(Arrays::stream).toArray();
        try (final DataInputStream in = mockDataInputStream(valuesAndOffsets)) {
            final long startOffset = IndexDbUtils.findStartOffset(in, iPartitioner, Range.closed(startRow.token, startRow.token), Stats.DoNothingStats.INSTANCE);
            assertEquals(rows[startPos - 1].position, startOffset);
            FourZeroUtils.skipRowIndexEntry(in);
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    });
}
Also used : IOException(java.io.IOException) DataInputStream(java.io.DataInputStream) IPartitioner(org.apache.cassandra.spark.shaded.fourzero.cassandra.dht.IPartitioner) TestUtils.runTest(org.apache.cassandra.spark.TestUtils.runTest) Test(org.junit.Test)

Example 3 with IPartitioner

use of org.apache.cassandra.spark.shaded.fourzero.cassandra.dht.IPartitioner in project spark-cassandra-bulkreader by jberragan.

the class IndexDbTests method testSearchIndex.

@Test
public void testSearchIndex() {
    runTest((partitioner, dir, bridge) -> {
        final TestSchema schema = TestSchema.basicBuilder(bridge).withCompression(false).build();
        final IPartitioner iPartitioner = FourZero.getPartitioner(partitioner);
        final int numRows = 5000;
        // write an sstable and record token
        final List<BigInteger> tokens = new ArrayList<>(numRows);
        TestUtils.writeSSTable(bridge, dir, partitioner, schema, (writer) -> {
            for (int i = 0; i < numRows; i++) {
                final ByteBuffer key = (ByteBuffer) ByteBuffer.allocate(4).putInt(i).flip();
                final BigInteger token = FourZeroUtils.tokenToBigInteger(iPartitioner.decorateKey(key).getToken());
                tokens.add(token);
                writer.write(i, 0, i);
            }
        });
        assertEquals(1, countSSTables(dir));
        Collections.sort(tokens);
        final TableMetadata metadata = Schema.instance.getTableMetadata(schema.keyspace, schema.table);
        if (metadata == null) {
            throw new NullPointerException("Could not find table");
        }
        final Path summaryDb = TestUtils.getFirstFileType(dir, DataLayer.FileType.SUMMARY);
        assertNotNull(summaryDb);
        final LocalDataLayer dataLayer = new LocalDataLayer(CassandraBridge.CassandraVersion.FOURZERO, partitioner, schema.keyspace, schema.createStmt, false, Collections.emptySet(), true, null, dir.toString());
        final DataLayer.SSTable ssTable = dataLayer.listSSTables().findFirst().orElseThrow(() -> new RuntimeException("Could not find sstable"));
        final int rowSize = 39;
        final int sample = 4;
        // sample the token list and read offset in Index.db for sampled list & verify the offset matches the expected
        // we sample the list as IndexDbUtils.findStartOffset(...) returns the previous offset, so we want to test
        // it correctly skips tokens less than the token we are looking for before returning.
        final List<BigInteger> sparseList = IntStream.range(0, tokens.size()).filter(i -> i > 0 && i % sample == 0).mapToObj(tokens::get).collect(Collectors.toList());
        assertEquals((numRows / 4) - 1, sparseList.size());
        try (final DataInputStream in = new DataInputStream(Objects.requireNonNull(ssTable.openPrimaryIndexStream()))) {
            try {
                for (int idx = 0; idx < sparseList.size(); idx++) {
                    final BigInteger token = sparseList.get(idx);
                    final long expectedOffset = (((idx + 1L) * sample) - 1) * rowSize;
                    final long offset = IndexDbUtils.findStartOffset(in, iPartitioner, Range.closed(token, token), Stats.DoNothingStats.INSTANCE);
                    assertEquals(expectedOffset, offset);
                    FourZeroUtils.skipRowIndexEntry(in);
                }
            } catch (final EOFException ignore) {
            }
        }
    });
}
Also used : TableMetadata(org.apache.cassandra.spark.shaded.fourzero.cassandra.schema.TableMetadata) Path(java.nio.file.Path) ArrayList(java.util.ArrayList) TestSchema(org.apache.cassandra.spark.TestSchema) DataInputStream(java.io.DataInputStream) ByteBuffer(java.nio.ByteBuffer) LocalDataLayer(org.apache.cassandra.spark.data.LocalDataLayer) LocalDataLayer(org.apache.cassandra.spark.data.LocalDataLayer) DataLayer(org.apache.cassandra.spark.data.DataLayer) EOFException(java.io.EOFException) BigInteger(java.math.BigInteger) IPartitioner(org.apache.cassandra.spark.shaded.fourzero.cassandra.dht.IPartitioner) TestUtils.runTest(org.apache.cassandra.spark.TestUtils.runTest) Test(org.junit.Test)

Example 4 with IPartitioner

use of org.apache.cassandra.spark.shaded.fourzero.cassandra.dht.IPartitioner in project spark-cassandra-bulkreader by jberragan.

the class FourZeroUtils method filterKeyInBloomFilter.

static List<CustomFilter> filterKeyInBloomFilter(@NotNull final DataLayer.SSTable ssTable, @NotNull final IPartitioner partitioner, final Descriptor descriptor, @NotNull final List<CustomFilter> filters) throws IOException {
    try {
        final BloomFilter bloomFilter = SSTableCache.INSTANCE.bloomFilter(ssTable, descriptor);
        final Function<CustomFilter, Boolean> canApplyMatch = CustomFilter::canFilterByKey;
        final Function<PartitionKeyFilter, Boolean> isKeyPresent = filter -> bloomFilter.isPresent(partitioner.decorateKey(filter.key()));
        return filters.stream().filter(filter -> filter.matchFound(canApplyMatch, isKeyPresent)).collect(Collectors.toList());
    } catch (Exception e) {
        if (e instanceof FileNotFoundException) {
            return filters;
        }
        throw e;
    }
}
Also used : Arrays(java.util.Arrays) DataInputBuffer(org.apache.cassandra.spark.shaded.fourzero.cassandra.io.util.DataInputBuffer) Version(org.apache.cassandra.spark.shaded.fourzero.cassandra.io.sstable.format.Version) FBUtilities.updateChecksumInt(org.apache.cassandra.spark.shaded.fourzero.cassandra.utils.FBUtilities.updateChecksumInt) CustomFilter(org.apache.cassandra.spark.sparksql.filters.CustomFilter) TypeParser(org.apache.cassandra.spark.shaded.fourzero.cassandra.db.marshal.TypeParser) ByteBufferAccessor(org.apache.cassandra.spark.shaded.fourzero.cassandra.db.marshal.ByteBufferAccessor) EncodingStats(org.apache.cassandra.spark.shaded.fourzero.cassandra.db.rows.EncodingStats) ByteBuffer(java.nio.ByteBuffer) DecoratedKey(org.apache.cassandra.spark.shaded.fourzero.cassandra.db.DecoratedKey) AbstractType(org.apache.cassandra.spark.shaded.fourzero.cassandra.db.marshal.AbstractType) PartitionKeyFilter(org.apache.cassandra.spark.sparksql.filters.PartitionKeyFilter) Pair(org.apache.commons.lang3.tuple.Pair) ByteBufUtils(org.apache.cassandra.spark.utils.ByteBufUtils) Map(java.util.Map) CorruptSSTableException(org.apache.cassandra.spark.shaded.fourzero.cassandra.io.sstable.CorruptSSTableException) BigInteger(java.math.BigInteger) SerializationHeader(org.apache.cassandra.spark.shaded.fourzero.cassandra.db.SerializationHeader) EnumSet(java.util.EnumSet) IPartitioner(org.apache.cassandra.spark.shaded.fourzero.cassandra.dht.IPartitioner) EnumMap(java.util.EnumMap) MetadataComponent(org.apache.cassandra.spark.shaded.fourzero.cassandra.io.sstable.metadata.MetadataComponent) BloomFilter(org.apache.cassandra.spark.shaded.fourzero.cassandra.utils.BloomFilter) EOFException(java.io.EOFException) Collectors(java.util.stream.Collectors) FileNotFoundException(java.io.FileNotFoundException) VIntCoding(org.apache.cassandra.spark.shaded.fourzero.cassandra.utils.vint.VIntCoding) InvocationTargetException(java.lang.reflect.InvocationTargetException) List(java.util.List) ValidationMetadata(org.apache.cassandra.spark.shaded.fourzero.cassandra.io.sstable.metadata.ValidationMetadata) NotNull(org.jetbrains.annotations.NotNull) BloomFilterSerializer(org.apache.cassandra.spark.shaded.fourzero.cassandra.utils.BloomFilterSerializer) Token(org.apache.cassandra.spark.shaded.fourzero.cassandra.dht.Token) DataInputStream(java.io.DataInputStream) RandomPartitioner(org.apache.cassandra.spark.shaded.fourzero.cassandra.dht.RandomPartitioner) Constructor(java.lang.reflect.Constructor) Function(java.util.function.Function) ArrayList(java.util.ArrayList) LinkedHashMap(java.util.LinkedHashMap) Descriptor(org.apache.cassandra.spark.shaded.fourzero.cassandra.io.sstable.Descriptor) UTF8Type(org.apache.cassandra.spark.shaded.fourzero.cassandra.db.marshal.UTF8Type) Murmur3Partitioner(org.apache.cassandra.spark.shaded.fourzero.cassandra.dht.Murmur3Partitioner) ByteBufferUtil(org.apache.cassandra.spark.shaded.fourzero.cassandra.utils.ByteBufferUtil) ClusteringPrefix(org.apache.cassandra.spark.shaded.fourzero.cassandra.db.ClusteringPrefix) TableMetadata(org.apache.cassandra.spark.shaded.fourzero.cassandra.schema.TableMetadata) IOException(java.io.IOException) Component(org.apache.cassandra.spark.shaded.fourzero.cassandra.io.sstable.Component) MetadataType(org.apache.cassandra.spark.shaded.fourzero.cassandra.io.sstable.metadata.MetadataType) DataInputPlus(org.apache.cassandra.spark.shaded.fourzero.cassandra.io.util.DataInputPlus) DataLayer(org.apache.cassandra.spark.data.DataLayer) CRC32(java.util.zip.CRC32) CompositeType(org.apache.cassandra.spark.shaded.fourzero.cassandra.db.marshal.CompositeType) Collections(java.util.Collections) Clustering(org.apache.cassandra.spark.shaded.fourzero.cassandra.db.Clustering) InputStream(java.io.InputStream) CustomFilter(org.apache.cassandra.spark.sparksql.filters.CustomFilter) FileNotFoundException(java.io.FileNotFoundException) BloomFilter(org.apache.cassandra.spark.shaded.fourzero.cassandra.utils.BloomFilter) PartitionKeyFilter(org.apache.cassandra.spark.sparksql.filters.PartitionKeyFilter) CorruptSSTableException(org.apache.cassandra.spark.shaded.fourzero.cassandra.io.sstable.CorruptSSTableException) EOFException(java.io.EOFException) FileNotFoundException(java.io.FileNotFoundException) InvocationTargetException(java.lang.reflect.InvocationTargetException) IOException(java.io.IOException)

Example 5 with IPartitioner

use of org.apache.cassandra.spark.shaded.fourzero.cassandra.dht.IPartitioner in project spark-cassandra-bulkreader by jberragan.

the class FourZeroUtils method keysFromIndex.

static Pair<DecoratedKey, DecoratedKey> keysFromIndex(@NotNull final TableMetadata metadata, @NotNull final DataLayer.SSTable ssTable) throws IOException {
    try (final InputStream primaryIndex = ssTable.openPrimaryIndexStream()) {
        if (primaryIndex != null) {
            final IPartitioner partitioner = metadata.partitioner;
            final Pair<ByteBuffer, ByteBuffer> keys = FourZeroUtils.readPrimaryIndex(primaryIndex, true, Collections.emptyList());
            return Pair.of(partitioner.decorateKey(keys.getLeft()), partitioner.decorateKey(keys.getRight()));
        }
    }
    return Pair.of(null, null);
}
Also used : DataInputStream(java.io.DataInputStream) InputStream(java.io.InputStream) ByteBuffer(java.nio.ByteBuffer) IPartitioner(org.apache.cassandra.spark.shaded.fourzero.cassandra.dht.IPartitioner)

Aggregations

IPartitioner (org.apache.cassandra.spark.shaded.fourzero.cassandra.dht.IPartitioner)7 ByteBuffer (java.nio.ByteBuffer)6 DataInputStream (java.io.DataInputStream)5 BigInteger (java.math.BigInteger)5 TestUtils.runTest (org.apache.cassandra.spark.TestUtils.runTest)5 Test (org.junit.Test)5 DataLayer (org.apache.cassandra.spark.data.DataLayer)4 IOException (java.io.IOException)3 InputStream (java.io.InputStream)3 Path (java.nio.file.Path)3 ArrayList (java.util.ArrayList)3 TestSchema (org.apache.cassandra.spark.TestSchema)3 TableMetadata (org.apache.cassandra.spark.shaded.fourzero.cassandra.schema.TableMetadata)3 EOFException (java.io.EOFException)2 LocalDataLayer (org.apache.cassandra.spark.data.LocalDataLayer)2 BufferedInputStream (java.io.BufferedInputStream)1 FileInputStream (java.io.FileInputStream)1 FileNotFoundException (java.io.FileNotFoundException)1 Constructor (java.lang.reflect.Constructor)1 InvocationTargetException (java.lang.reflect.InvocationTargetException)1