Search in sources :

Example 1 with CustomFilter

use of org.apache.cassandra.spark.sparksql.filters.CustomFilter in project spark-cassandra-bulkreader by jberragan.

the class SSTableReaderTests method testPartialFilterMatch.

@Test
public void testPartialFilterMatch() {
    runTest((partitioner, dir, bridge) -> {
        // write an SSTable
        final TestSchema schema = TestSchema.basic(bridge);
        TestUtils.writeSSTable(bridge, dir, partitioner, schema, (writer) -> {
            for (int i = 0; i < NUM_ROWS; i++) {
                for (int j = 0; j < NUM_COLS; j++) {
                    writer.write(i, j, i + j);
                }
            }
        });
        assertEquals(1, countSSTables(dir));
        final Path dataFile = getFirstFileType(dir, DataLayer.FileType.DATA);
        final TableMetadata metaData = new FourZeroSchemaBuilder(schema.createStmt, schema.keyspace, new ReplicationFactor(ReplicationFactor.ReplicationStrategy.SimpleStrategy, ImmutableMap.of("replication_factor", 1)), partitioner).tableMetaData();
        final TestDataLayer dataLayer = new TestDataLayer(bridge, Collections.singletonList(dataFile));
        final ByteBuffer key1 = Int32Type.instance.fromString("0");
        final BigInteger token1 = bridge.hash(partitioner, key1);
        final PartitionKeyFilter keyInSSTable = PartitionKeyFilter.create(key1, token1);
        final SparkRangeFilter rangeFilter = SparkRangeFilter.create(Range.closed(token1, token1));
        final ByteBuffer key2 = Int32Type.instance.fromString("55");
        final BigInteger token2 = bridge.hash(partitioner, key2);
        final PartitionKeyFilter keyNotInSSTable = PartitionKeyFilter.create(key2, token2);
        final List<CustomFilter> filters = Arrays.asList(rangeFilter, keyInSSTable, keyNotInSSTable);
        final AtomicBoolean pass = new AtomicBoolean(true);
        final AtomicInteger skipCount = new AtomicInteger(0);
        final Stats stats = new Stats() {

            @Override
            public void skippedPartition(ByteBuffer key, BigInteger token) {
                LOGGER.info("Skipping partition: " + token);
                skipCount.incrementAndGet();
                if (filters.stream().anyMatch(filter -> !filter.skipPartition(key, token))) {
                    LOGGER.info("Should not skip partition: " + token);
                    pass.set(false);
                }
            }
        };
        final FourZeroSSTableReader reader = openReader(metaData, dataLayer.listSSTables().findFirst().orElseThrow(() -> new RuntimeException("Could not find SSTable")), filters, false, stats);
        final int rows = countAndValidateRows(reader);
        assertTrue(skipCount.get() > 0);
        assertEquals(NUM_COLS, rows);
        // should skip partitions not matching filters
        assertEquals((NUM_ROWS - skipCount.get()) * NUM_COLS, rows);
        assertTrue(pass.get());
    });
}
Also used : Path(java.nio.file.Path) TableMetadata(org.apache.cassandra.spark.shaded.fourzero.cassandra.schema.TableMetadata) ReplicationFactor(org.apache.cassandra.spark.data.ReplicationFactor) TestSchema(org.apache.cassandra.spark.TestSchema) ByteBuffer(java.nio.ByteBuffer) PartitionKeyFilter(org.apache.cassandra.spark.sparksql.filters.PartitionKeyFilter) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) CustomFilter(org.apache.cassandra.spark.sparksql.filters.CustomFilter) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) TestDataLayer(org.apache.cassandra.spark.TestDataLayer) Stats(org.apache.cassandra.spark.stats.Stats) BigInteger(java.math.BigInteger) SparkRangeFilter(org.apache.cassandra.spark.sparksql.filters.SparkRangeFilter) TestUtils.runTest(org.apache.cassandra.spark.TestUtils.runTest) Test(org.junit.Test)

Example 2 with CustomFilter

use of org.apache.cassandra.spark.sparksql.filters.CustomFilter in project spark-cassandra-bulkreader by jberragan.

the class SSTableReaderTests method testExtractRangePartitionKeyFilters.

@Test
public void testExtractRangePartitionKeyFilters() {
    final List<ByteBuffer> keys = new ArrayList<>();
    for (int i = 0; i < 1000; i++) {
        keys.add((ByteBuffer) ByteBuffer.allocate(4).putInt(i).flip());
    }
    final List<PartitionKeyFilter> partitionKeyFilters = keys.stream().map(b -> {
        final BigInteger token = FourZeroUtils.tokenToBigInteger(Murmur3Partitioner.instance.getToken(b).getToken());
        return PartitionKeyFilter.create(b, token);
    }).collect(Collectors.toList());
    final List<CustomFilter> filters = new ArrayList<>(partitionKeyFilters.size() + 1);
    final Range<BigInteger> sparkRange = Range.closed(new BigInteger("0"), new BigInteger("2305843009213693952"));
    filters.add(SparkRangeFilter.create(sparkRange));
    filters.addAll(partitionKeyFilters.stream().filter(t -> sparkRange.contains(t.token())).collect(Collectors.toList()));
    assertTrue(filters.size() > 1);
    final Optional<Range<BigInteger>> range = FourZeroSSTableReader.extractRange(filters);
    assertTrue(range.isPresent());
    assertNotEquals(sparkRange, range.get());
    assertTrue(sparkRange.lowerEndpoint().compareTo(range.get().lowerEndpoint()) < 0);
    assertTrue(sparkRange.upperEndpoint().compareTo(range.get().upperEndpoint()) > 0);
}
Also used : Arrays(java.util.Arrays) StringUtils(org.apache.commons.lang.StringUtils) BufferedInputStream(java.io.BufferedInputStream) UnfilteredRowIterator(org.apache.cassandra.spark.shaded.fourzero.cassandra.db.rows.UnfilteredRowIterator) CustomFilter(org.apache.cassandra.spark.sparksql.filters.CustomFilter) LoggerFactory(org.slf4j.LoggerFactory) AbstractRow(org.apache.cassandra.spark.shaded.fourzero.cassandra.db.rows.AbstractRow) SparkRowIterator(org.apache.cassandra.spark.sparksql.SparkRowIterator) ByteBuffer(java.nio.ByteBuffer) DecoratedKey(org.apache.cassandra.spark.shaded.fourzero.cassandra.db.DecoratedKey) PartitionKeyFilter(org.apache.cassandra.spark.sparksql.filters.PartitionKeyFilter) Pair(org.apache.commons.lang3.tuple.Pair) ByteBufUtils(org.apache.cassandra.spark.utils.ByteBufUtils) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) TestUtils(org.apache.cassandra.spark.TestUtils) Map(java.util.Map) NUM_ROWS(org.apache.cassandra.spark.TestUtils.NUM_ROWS) BigInteger(java.math.BigInteger) Path(java.nio.file.Path) SSTablesSupplier(org.apache.cassandra.spark.data.SSTablesSupplier) ImmutableMap(com.google.common.collect.ImmutableMap) Range(com.google.common.collect.Range) Set(java.util.Set) Collectors(java.util.stream.Collectors) List(java.util.List) Stream(java.util.stream.Stream) Assert.assertFalse(org.junit.Assert.assertFalse) TestUtils.getFileType(org.apache.cassandra.spark.TestUtils.getFileType) Optional(java.util.Optional) SparkSSTableReader(org.apache.cassandra.spark.reader.SparkSSTableReader) NotNull(org.jetbrains.annotations.NotNull) Rid(org.apache.cassandra.spark.reader.Rid) Cell(org.apache.cassandra.spark.shaded.fourzero.cassandra.db.rows.Cell) DataInputStream(java.io.DataInputStream) InternalRow(org.apache.spark.sql.catalyst.InternalRow) ReplicationFactor(org.apache.cassandra.spark.data.ReplicationFactor) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) ISSTableScanner(org.apache.cassandra.spark.shaded.fourzero.cassandra.io.sstable.ISSTableScanner) Unfiltered(org.apache.cassandra.spark.shaded.fourzero.cassandra.db.rows.Unfiltered) TestDataLayer(org.apache.cassandra.spark.TestDataLayer) Function(java.util.function.Function) Int32Type(org.apache.cassandra.spark.shaded.fourzero.cassandra.db.marshal.Int32Type) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) TestUtils.runTest(org.apache.cassandra.spark.TestUtils.runTest) Stats(org.apache.cassandra.spark.stats.Stats) ColumnData(org.apache.cassandra.spark.shaded.fourzero.cassandra.db.rows.ColumnData) Descriptor(org.apache.cassandra.spark.shaded.fourzero.cassandra.io.sstable.Descriptor) BufferDecoratedKey(org.apache.cassandra.spark.shaded.fourzero.cassandra.db.BufferDecoratedKey) Murmur3Partitioner(org.apache.cassandra.spark.shaded.fourzero.cassandra.dht.Murmur3Partitioner) SparkRangeFilter(org.apache.cassandra.spark.sparksql.filters.SparkRangeFilter) TestUtils.countSSTables(org.apache.cassandra.spark.TestUtils.countSSTables) Logger(org.slf4j.Logger) TableMetadata(org.apache.cassandra.spark.shaded.fourzero.cassandra.schema.TableMetadata) Files(java.nio.file.Files) Assert.assertNotNull(org.junit.Assert.assertNotNull) Assert.assertTrue(org.junit.Assert.assertTrue) IOException(java.io.IOException) Test(org.junit.Test) FileInputStream(java.io.FileInputStream) File(java.io.File) Assert.assertNotEquals(org.junit.Assert.assertNotEquals) DataInputPlus(org.apache.cassandra.spark.shaded.fourzero.cassandra.io.util.DataInputPlus) DataLayer(org.apache.cassandra.spark.data.DataLayer) Partitioner(org.apache.cassandra.spark.data.partitioner.Partitioner) Paths(java.nio.file.Paths) NUM_COLS(org.apache.cassandra.spark.TestUtils.NUM_COLS) TestUtils.getFirstFileType(org.apache.cassandra.spark.TestUtils.getFirstFileType) Collections(java.util.Collections) TestSchema(org.apache.cassandra.spark.TestSchema) Assert.assertEquals(org.junit.Assert.assertEquals) InputStream(java.io.InputStream) ArrayList(java.util.ArrayList) Range(com.google.common.collect.Range) ByteBuffer(java.nio.ByteBuffer) PartitionKeyFilter(org.apache.cassandra.spark.sparksql.filters.PartitionKeyFilter) CustomFilter(org.apache.cassandra.spark.sparksql.filters.CustomFilter) BigInteger(java.math.BigInteger) TestUtils.runTest(org.apache.cassandra.spark.TestUtils.runTest) Test(org.junit.Test)

Example 3 with CustomFilter

use of org.apache.cassandra.spark.sparksql.filters.CustomFilter in project spark-cassandra-bulkreader by jberragan.

the class SSTableReaderTests method testFilterKeyMissingInIndex.

@Test
public void testFilterKeyMissingInIndex() {
    runTest((partitioner, dir, bridge) -> {
        // write an SSTable
        final TestSchema schema = TestSchema.basic(bridge);
        TestUtils.writeSSTable(bridge, dir, partitioner, schema, (writer) -> {
            for (int i = 0; i < NUM_ROWS; i++) {
                for (int j = 0; j < NUM_COLS; j++) {
                    writer.write(i, j, i + j);
                }
            }
        });
        assertEquals(1, countSSTables(dir));
        final Path dataFile = getFirstFileType(dir, DataLayer.FileType.DATA);
        final TableMetadata metaData = new FourZeroSchemaBuilder(schema.createStmt, schema.keyspace, new ReplicationFactor(ReplicationFactor.ReplicationStrategy.SimpleStrategy, ImmutableMap.of("replication_factor", 1)), partitioner).tableMetaData();
        final TestDataLayer dataLayer = new TestDataLayer(bridge, Collections.singletonList(dataFile));
        final ByteBuffer key1 = Int32Type.instance.fromString("51");
        final BigInteger token1 = bridge.hash(partitioner, key1);
        final PartitionKeyFilter keyNotInSSTable1 = PartitionKeyFilter.create(key1, token1);
        final ByteBuffer key2 = Int32Type.instance.fromString("90");
        final BigInteger token2 = bridge.hash(partitioner, key2);
        final PartitionKeyFilter keyNotInSSTable2 = PartitionKeyFilter.create(key2, token2);
        final List<CustomFilter> filters = Arrays.asList(keyNotInSSTable1, keyNotInSSTable2);
        final AtomicBoolean pass = new AtomicBoolean(true);
        final AtomicInteger skipCount = new AtomicInteger(0);
        final Stats stats = new Stats() {

            @Override
            public void skippedSSTable(List<CustomFilter> filters, BigInteger firstToken, BigInteger lastToken) {
                pass.set(false);
            }

            @Override
            public void missingInIndex() {
                skipCount.incrementAndGet();
                if (filters.size() != 2) {
                    pass.set(false);
                }
            }
        };
        final FourZeroSSTableReader reader = openReader(metaData, dataLayer.listSSTables().findFirst().orElseThrow(() -> new RuntimeException("Could not find SSTable")), filters, true, stats);
        assertTrue(reader.ignore());
        assertEquals(1, skipCount.get());
        assertTrue(pass.get());
    });
}
Also used : Path(java.nio.file.Path) TableMetadata(org.apache.cassandra.spark.shaded.fourzero.cassandra.schema.TableMetadata) ReplicationFactor(org.apache.cassandra.spark.data.ReplicationFactor) TestSchema(org.apache.cassandra.spark.TestSchema) ByteBuffer(java.nio.ByteBuffer) PartitionKeyFilter(org.apache.cassandra.spark.sparksql.filters.PartitionKeyFilter) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) CustomFilter(org.apache.cassandra.spark.sparksql.filters.CustomFilter) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) TestDataLayer(org.apache.cassandra.spark.TestDataLayer) Stats(org.apache.cassandra.spark.stats.Stats) BigInteger(java.math.BigInteger) List(java.util.List) ArrayList(java.util.ArrayList) TestUtils.runTest(org.apache.cassandra.spark.TestUtils.runTest) Test(org.junit.Test)

Example 4 with CustomFilter

use of org.apache.cassandra.spark.sparksql.filters.CustomFilter in project spark-cassandra-bulkreader by jberragan.

the class FourZeroUtils method readPrimaryIndex.

/**
 * Read primary Index.db file, read through all partitions to get first and last partition key
 *
 * @param primaryIndex input stream for Index.db file
 * @return pair of first and last decorated keys
 * @throws IOException
 */
@SuppressWarnings("InfiniteLoopStatement")
static Pair<ByteBuffer, ByteBuffer> readPrimaryIndex(@NotNull final InputStream primaryIndex, final boolean readFirstLastKey, @NotNull final List<CustomFilter> filters) throws IOException {
    ByteBuffer firstKey = null, lastKey = null;
    try (final DataInputStream dis = new DataInputStream(primaryIndex)) {
        byte[] last = null;
        try {
            while (true) {
                final int len = dis.readUnsignedShort();
                final byte[] buf = new byte[len];
                dis.readFully(buf);
                if (firstKey == null) {
                    firstKey = ByteBuffer.wrap(buf);
                }
                last = buf;
                final ByteBuffer key = ByteBuffer.wrap(last);
                if (!readFirstLastKey && filters.stream().anyMatch(filter -> filter.canFilterByKey() && filter.filter(key))) {
                    return Pair.of(null, null);
                }
                // read position & skip promoted index
                skipRowIndexEntry(dis);
            }
        } catch (final EOFException ignored) {
        }
        if (last != null) {
            lastKey = ByteBuffer.wrap(last);
        }
    }
    return Pair.of(firstKey, lastKey);
}
Also used : Arrays(java.util.Arrays) DataInputBuffer(org.apache.cassandra.spark.shaded.fourzero.cassandra.io.util.DataInputBuffer) Version(org.apache.cassandra.spark.shaded.fourzero.cassandra.io.sstable.format.Version) FBUtilities.updateChecksumInt(org.apache.cassandra.spark.shaded.fourzero.cassandra.utils.FBUtilities.updateChecksumInt) CustomFilter(org.apache.cassandra.spark.sparksql.filters.CustomFilter) TypeParser(org.apache.cassandra.spark.shaded.fourzero.cassandra.db.marshal.TypeParser) ByteBufferAccessor(org.apache.cassandra.spark.shaded.fourzero.cassandra.db.marshal.ByteBufferAccessor) EncodingStats(org.apache.cassandra.spark.shaded.fourzero.cassandra.db.rows.EncodingStats) ByteBuffer(java.nio.ByteBuffer) DecoratedKey(org.apache.cassandra.spark.shaded.fourzero.cassandra.db.DecoratedKey) AbstractType(org.apache.cassandra.spark.shaded.fourzero.cassandra.db.marshal.AbstractType) PartitionKeyFilter(org.apache.cassandra.spark.sparksql.filters.PartitionKeyFilter) Pair(org.apache.commons.lang3.tuple.Pair) ByteBufUtils(org.apache.cassandra.spark.utils.ByteBufUtils) Map(java.util.Map) CorruptSSTableException(org.apache.cassandra.spark.shaded.fourzero.cassandra.io.sstable.CorruptSSTableException) BigInteger(java.math.BigInteger) SerializationHeader(org.apache.cassandra.spark.shaded.fourzero.cassandra.db.SerializationHeader) EnumSet(java.util.EnumSet) IPartitioner(org.apache.cassandra.spark.shaded.fourzero.cassandra.dht.IPartitioner) EnumMap(java.util.EnumMap) MetadataComponent(org.apache.cassandra.spark.shaded.fourzero.cassandra.io.sstable.metadata.MetadataComponent) BloomFilter(org.apache.cassandra.spark.shaded.fourzero.cassandra.utils.BloomFilter) EOFException(java.io.EOFException) Collectors(java.util.stream.Collectors) FileNotFoundException(java.io.FileNotFoundException) VIntCoding(org.apache.cassandra.spark.shaded.fourzero.cassandra.utils.vint.VIntCoding) InvocationTargetException(java.lang.reflect.InvocationTargetException) List(java.util.List) ValidationMetadata(org.apache.cassandra.spark.shaded.fourzero.cassandra.io.sstable.metadata.ValidationMetadata) NotNull(org.jetbrains.annotations.NotNull) BloomFilterSerializer(org.apache.cassandra.spark.shaded.fourzero.cassandra.utils.BloomFilterSerializer) Token(org.apache.cassandra.spark.shaded.fourzero.cassandra.dht.Token) DataInputStream(java.io.DataInputStream) RandomPartitioner(org.apache.cassandra.spark.shaded.fourzero.cassandra.dht.RandomPartitioner) Constructor(java.lang.reflect.Constructor) Function(java.util.function.Function) ArrayList(java.util.ArrayList) LinkedHashMap(java.util.LinkedHashMap) Descriptor(org.apache.cassandra.spark.shaded.fourzero.cassandra.io.sstable.Descriptor) UTF8Type(org.apache.cassandra.spark.shaded.fourzero.cassandra.db.marshal.UTF8Type) Murmur3Partitioner(org.apache.cassandra.spark.shaded.fourzero.cassandra.dht.Murmur3Partitioner) ByteBufferUtil(org.apache.cassandra.spark.shaded.fourzero.cassandra.utils.ByteBufferUtil) ClusteringPrefix(org.apache.cassandra.spark.shaded.fourzero.cassandra.db.ClusteringPrefix) TableMetadata(org.apache.cassandra.spark.shaded.fourzero.cassandra.schema.TableMetadata) IOException(java.io.IOException) Component(org.apache.cassandra.spark.shaded.fourzero.cassandra.io.sstable.Component) MetadataType(org.apache.cassandra.spark.shaded.fourzero.cassandra.io.sstable.metadata.MetadataType) DataInputPlus(org.apache.cassandra.spark.shaded.fourzero.cassandra.io.util.DataInputPlus) DataLayer(org.apache.cassandra.spark.data.DataLayer) CRC32(java.util.zip.CRC32) CompositeType(org.apache.cassandra.spark.shaded.fourzero.cassandra.db.marshal.CompositeType) Collections(java.util.Collections) Clustering(org.apache.cassandra.spark.shaded.fourzero.cassandra.db.Clustering) InputStream(java.io.InputStream) EOFException(java.io.EOFException) DataInputStream(java.io.DataInputStream) ByteBuffer(java.nio.ByteBuffer)

Example 5 with CustomFilter

use of org.apache.cassandra.spark.sparksql.filters.CustomFilter in project spark-cassandra-bulkreader by jberragan.

the class FourZeroUtils method filterKeyInBloomFilter.

static List<CustomFilter> filterKeyInBloomFilter(@NotNull final DataLayer.SSTable ssTable, @NotNull final IPartitioner partitioner, final Descriptor descriptor, @NotNull final List<CustomFilter> filters) throws IOException {
    try {
        final BloomFilter bloomFilter = SSTableCache.INSTANCE.bloomFilter(ssTable, descriptor);
        final Function<CustomFilter, Boolean> canApplyMatch = CustomFilter::canFilterByKey;
        final Function<PartitionKeyFilter, Boolean> isKeyPresent = filter -> bloomFilter.isPresent(partitioner.decorateKey(filter.key()));
        return filters.stream().filter(filter -> filter.matchFound(canApplyMatch, isKeyPresent)).collect(Collectors.toList());
    } catch (Exception e) {
        if (e instanceof FileNotFoundException) {
            return filters;
        }
        throw e;
    }
}
Also used : Arrays(java.util.Arrays) DataInputBuffer(org.apache.cassandra.spark.shaded.fourzero.cassandra.io.util.DataInputBuffer) Version(org.apache.cassandra.spark.shaded.fourzero.cassandra.io.sstable.format.Version) FBUtilities.updateChecksumInt(org.apache.cassandra.spark.shaded.fourzero.cassandra.utils.FBUtilities.updateChecksumInt) CustomFilter(org.apache.cassandra.spark.sparksql.filters.CustomFilter) TypeParser(org.apache.cassandra.spark.shaded.fourzero.cassandra.db.marshal.TypeParser) ByteBufferAccessor(org.apache.cassandra.spark.shaded.fourzero.cassandra.db.marshal.ByteBufferAccessor) EncodingStats(org.apache.cassandra.spark.shaded.fourzero.cassandra.db.rows.EncodingStats) ByteBuffer(java.nio.ByteBuffer) DecoratedKey(org.apache.cassandra.spark.shaded.fourzero.cassandra.db.DecoratedKey) AbstractType(org.apache.cassandra.spark.shaded.fourzero.cassandra.db.marshal.AbstractType) PartitionKeyFilter(org.apache.cassandra.spark.sparksql.filters.PartitionKeyFilter) Pair(org.apache.commons.lang3.tuple.Pair) ByteBufUtils(org.apache.cassandra.spark.utils.ByteBufUtils) Map(java.util.Map) CorruptSSTableException(org.apache.cassandra.spark.shaded.fourzero.cassandra.io.sstable.CorruptSSTableException) BigInteger(java.math.BigInteger) SerializationHeader(org.apache.cassandra.spark.shaded.fourzero.cassandra.db.SerializationHeader) EnumSet(java.util.EnumSet) IPartitioner(org.apache.cassandra.spark.shaded.fourzero.cassandra.dht.IPartitioner) EnumMap(java.util.EnumMap) MetadataComponent(org.apache.cassandra.spark.shaded.fourzero.cassandra.io.sstable.metadata.MetadataComponent) BloomFilter(org.apache.cassandra.spark.shaded.fourzero.cassandra.utils.BloomFilter) EOFException(java.io.EOFException) Collectors(java.util.stream.Collectors) FileNotFoundException(java.io.FileNotFoundException) VIntCoding(org.apache.cassandra.spark.shaded.fourzero.cassandra.utils.vint.VIntCoding) InvocationTargetException(java.lang.reflect.InvocationTargetException) List(java.util.List) ValidationMetadata(org.apache.cassandra.spark.shaded.fourzero.cassandra.io.sstable.metadata.ValidationMetadata) NotNull(org.jetbrains.annotations.NotNull) BloomFilterSerializer(org.apache.cassandra.spark.shaded.fourzero.cassandra.utils.BloomFilterSerializer) Token(org.apache.cassandra.spark.shaded.fourzero.cassandra.dht.Token) DataInputStream(java.io.DataInputStream) RandomPartitioner(org.apache.cassandra.spark.shaded.fourzero.cassandra.dht.RandomPartitioner) Constructor(java.lang.reflect.Constructor) Function(java.util.function.Function) ArrayList(java.util.ArrayList) LinkedHashMap(java.util.LinkedHashMap) Descriptor(org.apache.cassandra.spark.shaded.fourzero.cassandra.io.sstable.Descriptor) UTF8Type(org.apache.cassandra.spark.shaded.fourzero.cassandra.db.marshal.UTF8Type) Murmur3Partitioner(org.apache.cassandra.spark.shaded.fourzero.cassandra.dht.Murmur3Partitioner) ByteBufferUtil(org.apache.cassandra.spark.shaded.fourzero.cassandra.utils.ByteBufferUtil) ClusteringPrefix(org.apache.cassandra.spark.shaded.fourzero.cassandra.db.ClusteringPrefix) TableMetadata(org.apache.cassandra.spark.shaded.fourzero.cassandra.schema.TableMetadata) IOException(java.io.IOException) Component(org.apache.cassandra.spark.shaded.fourzero.cassandra.io.sstable.Component) MetadataType(org.apache.cassandra.spark.shaded.fourzero.cassandra.io.sstable.metadata.MetadataType) DataInputPlus(org.apache.cassandra.spark.shaded.fourzero.cassandra.io.util.DataInputPlus) DataLayer(org.apache.cassandra.spark.data.DataLayer) CRC32(java.util.zip.CRC32) CompositeType(org.apache.cassandra.spark.shaded.fourzero.cassandra.db.marshal.CompositeType) Collections(java.util.Collections) Clustering(org.apache.cassandra.spark.shaded.fourzero.cassandra.db.Clustering) InputStream(java.io.InputStream) CustomFilter(org.apache.cassandra.spark.sparksql.filters.CustomFilter) FileNotFoundException(java.io.FileNotFoundException) BloomFilter(org.apache.cassandra.spark.shaded.fourzero.cassandra.utils.BloomFilter) PartitionKeyFilter(org.apache.cassandra.spark.sparksql.filters.PartitionKeyFilter) CorruptSSTableException(org.apache.cassandra.spark.shaded.fourzero.cassandra.io.sstable.CorruptSSTableException) EOFException(java.io.EOFException) FileNotFoundException(java.io.FileNotFoundException) InvocationTargetException(java.lang.reflect.InvocationTargetException) IOException(java.io.IOException)

Aggregations

BigInteger (java.math.BigInteger)12 CustomFilter (org.apache.cassandra.spark.sparksql.filters.CustomFilter)12 ByteBuffer (java.nio.ByteBuffer)10 List (java.util.List)8 PartitionKeyFilter (org.apache.cassandra.spark.sparksql.filters.PartitionKeyFilter)8 SparkRangeFilter (org.apache.cassandra.spark.sparksql.filters.SparkRangeFilter)8 Test (org.junit.Test)8 Function (java.util.function.Function)7 Range (com.google.common.collect.Range)6 Collectors (java.util.stream.Collectors)6 Stats (org.apache.cassandra.spark.stats.Stats)6 Path (java.nio.file.Path)5 ArrayList (java.util.ArrayList)5 Map (java.util.Map)5 TestSchema (org.apache.cassandra.spark.TestSchema)5 TestUtils.runTest (org.apache.cassandra.spark.TestUtils.runTest)5 TableMetadata (org.apache.cassandra.spark.shaded.fourzero.cassandra.schema.TableMetadata)5 InputStream (java.io.InputStream)4 Arrays (java.util.Arrays)4 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)4