Search in sources :

Example 1 with CompressionMetadata

use of org.apache.cassandra.io.compress.CompressionMetadata in project cassandra by apache.

the class TableMetrics method computeCompressionRatio.

/**
 * Computes the compression ratio for the specified SSTables
 *
 * @param sstables the SSTables
 * @return the compression ratio for the specified SSTables
 */
private static Double computeCompressionRatio(Iterable<SSTableReader> sstables) {
    double compressedLengthSum = 0;
    double dataLengthSum = 0;
    for (SSTableReader sstable : sstables) {
        if (sstable.compression) {
            // using SSTableSet.CANONICAL.
            assert sstable.openReason != SSTableReader.OpenReason.EARLY;
            CompressionMetadata compressionMetadata = sstable.getCompressionMetadata();
            compressedLengthSum += compressionMetadata.compressedFileLength;
            dataLengthSum += compressionMetadata.dataLength;
        }
    }
    return dataLengthSum != 0 ? compressedLengthSum / dataLengthSum : MetadataCollector.NO_COMPRESSION_RATIO;
}
Also used : SSTableReader(org.apache.cassandra.io.sstable.format.SSTableReader) CompressionMetadata(org.apache.cassandra.io.compress.CompressionMetadata)

Example 2 with CompressionMetadata

use of org.apache.cassandra.io.compress.CompressionMetadata in project cassandra by apache.

the class ScrubTest method overrideWithGarbage.

static void overrideWithGarbage(SSTableReader sstable, ByteBuffer key1, ByteBuffer key2) throws IOException {
    boolean compression = Boolean.parseBoolean(System.getProperty("cassandra.test.compression", "false"));
    long startPosition, endPosition;
    if (compression) {
        // overwrite with garbage the compression chunks from key1 to key2
        CompressionMetadata compData = CompressionMetadata.create(sstable.getFilename());
        CompressionMetadata.Chunk chunk1 = compData.chunkFor(sstable.getPosition(PartitionPosition.ForKey.get(key1, sstable.getPartitioner()), SSTableReader.Operator.EQ).position);
        CompressionMetadata.Chunk chunk2 = compData.chunkFor(sstable.getPosition(PartitionPosition.ForKey.get(key2, sstable.getPartitioner()), SSTableReader.Operator.EQ).position);
        startPosition = Math.min(chunk1.offset, chunk2.offset);
        endPosition = Math.max(chunk1.offset + chunk1.length, chunk2.offset + chunk2.length);
        compData.close();
    } else {
        // overwrite with garbage from key1 to key2
        long row0Start = sstable.getPosition(PartitionPosition.ForKey.get(key1, sstable.getPartitioner()), SSTableReader.Operator.EQ).position;
        long row1Start = sstable.getPosition(PartitionPosition.ForKey.get(key2, sstable.getPartitioner()), SSTableReader.Operator.EQ).position;
        startPosition = Math.min(row0Start, row1Start);
        endPosition = Math.max(row0Start, row1Start);
    }
    overrideWithGarbage(sstable, startPosition, endPosition);
}
Also used : CompressionMetadata(org.apache.cassandra.io.compress.CompressionMetadata)

Example 3 with CompressionMetadata

use of org.apache.cassandra.io.compress.CompressionMetadata in project cassandra by apache.

the class SSTableMetadataViewer method printSStableMetadata.

private void printSStableMetadata(String fname, boolean scan) throws IOException {
    Descriptor descriptor = Descriptor.fromFilename(fname);
    Map<MetadataType, MetadataComponent> metadata = descriptor.getMetadataSerializer().deserialize(descriptor, EnumSet.allOf(MetadataType.class));
    ValidationMetadata validation = (ValidationMetadata) metadata.get(MetadataType.VALIDATION);
    StatsMetadata stats = (StatsMetadata) metadata.get(MetadataType.STATS);
    CompactionMetadata compaction = (CompactionMetadata) metadata.get(MetadataType.COMPACTION);
    CompressionMetadata compression = null;
    File compressionFile = new File(descriptor.filenameFor(Component.COMPRESSION_INFO));
    if (compressionFile.exists())
        compression = CompressionMetadata.create(fname);
    SerializationHeader.Component header = (SerializationHeader.Component) metadata.get(MetadataType.HEADER);
    field("SSTable", descriptor);
    if (scan && descriptor.version.getVersion().compareTo("ma") >= 0) {
        printScannedOverview(descriptor, stats);
    }
    if (validation != null) {
        field("Partitioner", validation.partitioner);
        field("Bloom Filter FP chance", validation.bloomFilterFPChance);
    }
    if (stats != null) {
        field("Minimum timestamp", stats.minTimestamp, toDateString(stats.minTimestamp, tsUnit));
        field("Maximum timestamp", stats.maxTimestamp, toDateString(stats.maxTimestamp, tsUnit));
        field("SSTable min local deletion time", stats.minLocalDeletionTime, deletion(stats.minLocalDeletionTime));
        field("SSTable max local deletion time", stats.maxLocalDeletionTime, deletion(stats.maxLocalDeletionTime));
        field("Compressor", compression != null ? compression.compressor().getClass().getName() : "-");
        if (compression != null)
            field("Compression ratio", stats.compressionRatio);
        field("TTL min", stats.minTTL, toDurationString(stats.minTTL, TimeUnit.SECONDS));
        field("TTL max", stats.maxTTL, toDurationString(stats.maxTTL, TimeUnit.SECONDS));
        if (validation != null && header != null)
            printMinMaxToken(descriptor, FBUtilities.newPartitioner(descriptor), header.getKeyType());
        if (header != null && header.getClusteringTypes().size() == stats.minClusteringValues.size()) {
            List<AbstractType<?>> clusteringTypes = header.getClusteringTypes();
            List<ByteBuffer> minClusteringValues = stats.minClusteringValues;
            List<ByteBuffer> maxClusteringValues = stats.maxClusteringValues;
            String[] minValues = new String[clusteringTypes.size()];
            String[] maxValues = new String[clusteringTypes.size()];
            for (int i = 0; i < clusteringTypes.size(); i++) {
                minValues[i] = clusteringTypes.get(i).getString(minClusteringValues.get(i));
                maxValues[i] = clusteringTypes.get(i).getString(maxClusteringValues.get(i));
            }
            field("minClusteringValues", Arrays.toString(minValues));
            field("maxClusteringValues", Arrays.toString(maxValues));
        }
        field("Estimated droppable tombstones", stats.getEstimatedDroppableTombstoneRatio((int) (currentTimeMillis() / 1000) - this.gc));
        field("SSTable Level", stats.sstableLevel);
        field("Repaired at", stats.repairedAt, toDateString(stats.repairedAt, TimeUnit.MILLISECONDS));
        field("Pending repair", stats.pendingRepair);
        field("Replay positions covered", stats.commitLogIntervals);
        field("totalColumnsSet", stats.totalColumnsSet);
        field("totalRows", stats.totalRows);
        field("Estimated tombstone drop times", "");
        TermHistogram estDropped = new TermHistogram(stats.estimatedTombstoneDropTime, "Drop Time", offset -> String.format("%d %s", offset, Util.wrapQuiet(toDateString(offset, TimeUnit.SECONDS), color)), String::valueOf);
        estDropped.printHistogram(out, color, unicode);
        field("Partition Size", "");
        TermHistogram rowSize = new TermHistogram(stats.estimatedPartitionSize, "Size (bytes)", offset -> String.format("%d %s", offset, Util.wrapQuiet(toByteString(offset), color)), String::valueOf);
        rowSize.printHistogram(out, color, unicode);
        field("Column Count", "");
        TermHistogram cellCount = new TermHistogram(stats.estimatedCellPerPartitionCount, "Columns", String::valueOf, String::valueOf);
        cellCount.printHistogram(out, color, unicode);
    }
    if (compaction != null) {
        field("Estimated cardinality", compaction.cardinalityEstimator.cardinality());
    }
    if (header != null) {
        EncodingStats encodingStats = header.getEncodingStats();
        AbstractType<?> keyType = header.getKeyType();
        List<AbstractType<?>> clusteringTypes = header.getClusteringTypes();
        Map<ByteBuffer, AbstractType<?>> staticColumns = header.getStaticColumns();
        Map<String, String> statics = staticColumns.entrySet().stream().collect(Collectors.toMap(e -> UTF8Type.instance.getString(e.getKey()), e -> e.getValue().toString()));
        Map<ByteBuffer, AbstractType<?>> regularColumns = header.getRegularColumns();
        Map<String, String> regulars = regularColumns.entrySet().stream().collect(Collectors.toMap(e -> UTF8Type.instance.getString(e.getKey()), e -> e.getValue().toString()));
        field("EncodingStats minTTL", encodingStats.minTTL, toDurationString(encodingStats.minTTL, TimeUnit.SECONDS));
        field("EncodingStats minLocalDeletionTime", encodingStats.minLocalDeletionTime, toDateString(encodingStats.minLocalDeletionTime, TimeUnit.SECONDS));
        field("EncodingStats minTimestamp", encodingStats.minTimestamp, toDateString(encodingStats.minTimestamp, tsUnit));
        field("KeyType", keyType.toString());
        field("ClusteringTypes", clusteringTypes.toString());
        field("StaticColumns", FBUtilities.toString(statics));
        field("RegularColumns", FBUtilities.toString(regulars));
        field("IsTransient", stats.isTransient);
    }
}
Also used : Arrays(java.util.Arrays) CompressionMetadata(org.apache.cassandra.io.compress.CompressionMetadata) File(org.apache.cassandra.io.util.File) RESET(org.apache.cassandra.tools.Util.RESET) AbstractType(org.apache.cassandra.db.marshal.AbstractType) WHITE(org.apache.cassandra.tools.Util.WHITE) ByteBuffer(java.nio.ByteBuffer) SSTableReader(org.apache.cassandra.io.sstable.format.SSTableReader) DecoratedKey(org.apache.cassandra.db.DecoratedKey) Pair(org.apache.cassandra.utils.Pair) Map(java.util.Map) DatabaseDescriptor(org.apache.cassandra.config.DatabaseDescriptor) EnumSet(java.util.EnumSet) CYAN(org.apache.cassandra.tools.Util.CYAN) PrintWriter(java.io.PrintWriter) FBUtilities(org.apache.cassandra.utils.FBUtilities) MetadataType(org.apache.cassandra.io.sstable.metadata.MetadataType) BLUE(org.apache.cassandra.tools.Util.BLUE) Collectors(java.util.stream.Collectors) List(java.util.List) ValidationMetadata(org.apache.cassandra.io.sstable.metadata.ValidationMetadata) TableMetadataRef(org.apache.cassandra.schema.TableMetadataRef) ParseException(org.apache.commons.cli.ParseException) TableMetadata(org.apache.cassandra.schema.TableMetadata) CompactionMetadata(org.apache.cassandra.io.sstable.metadata.CompactionMetadata) StatsMetadata(org.apache.cassandra.io.sstable.metadata.StatsMetadata) DataInputStream(java.io.DataInputStream) Unfiltered(org.apache.cassandra.db.rows.Unfiltered) Global.currentTimeMillis(org.apache.cassandra.utils.Clock.Global.currentTimeMillis) Options(org.apache.commons.cli.Options) DurationFormatUtils.formatDurationWords(org.apache.commons.lang3.time.DurationFormatUtils.formatDurationWords) HelpFormatter(org.apache.commons.cli.HelpFormatter) UTF8Type(org.apache.cassandra.db.marshal.UTF8Type) Row(org.apache.cassandra.db.rows.Row) UnfilteredRowIterator(org.apache.cassandra.db.rows.UnfilteredRowIterator) Component(org.apache.cassandra.io.sstable.Component) MetadataComponent(org.apache.cassandra.io.sstable.metadata.MetadataComponent) Descriptor(org.apache.cassandra.io.sstable.Descriptor) CommandLine(org.apache.commons.cli.CommandLine) PosixParser(org.apache.commons.cli.PosixParser) SerializationHeader(org.apache.cassandra.db.SerializationHeader) Option(org.apache.commons.cli.Option) PrintStream(java.io.PrintStream) Files(java.nio.file.Files) CommandLineParser(org.apache.commons.cli.CommandLineParser) ISSTableScanner(org.apache.cassandra.io.sstable.ISSTableScanner) IOException(java.io.IOException) MinMaxPriorityQueue(com.google.common.collect.MinMaxPriorityQueue) TimeUnit(java.util.concurrent.TimeUnit) IPartitioner(org.apache.cassandra.dht.IPartitioner) EncodingStats(org.apache.cassandra.db.rows.EncodingStats) TermHistogram(org.apache.cassandra.tools.Util.TermHistogram) Comparator(java.util.Comparator) IndexSummary(org.apache.cassandra.io.sstable.IndexSummary) CompactionMetadata(org.apache.cassandra.io.sstable.metadata.CompactionMetadata) TermHistogram(org.apache.cassandra.tools.Util.TermHistogram) EncodingStats(org.apache.cassandra.db.rows.EncodingStats) Component(org.apache.cassandra.io.sstable.Component) MetadataComponent(org.apache.cassandra.io.sstable.metadata.MetadataComponent) MetadataComponent(org.apache.cassandra.io.sstable.metadata.MetadataComponent) StatsMetadata(org.apache.cassandra.io.sstable.metadata.StatsMetadata) CompressionMetadata(org.apache.cassandra.io.compress.CompressionMetadata) MetadataType(org.apache.cassandra.io.sstable.metadata.MetadataType) ValidationMetadata(org.apache.cassandra.io.sstable.metadata.ValidationMetadata) ByteBuffer(java.nio.ByteBuffer) SerializationHeader(org.apache.cassandra.db.SerializationHeader) AbstractType(org.apache.cassandra.db.marshal.AbstractType) DatabaseDescriptor(org.apache.cassandra.config.DatabaseDescriptor) Descriptor(org.apache.cassandra.io.sstable.Descriptor) File(org.apache.cassandra.io.util.File)

Example 4 with CompressionMetadata

use of org.apache.cassandra.io.compress.CompressionMetadata in project cassandra by apache.

the class MmappedRegionsTest method testMapForCompressionMetadata.

@Test
public void testMapForCompressionMetadata() throws Exception {
    int OLD_MAX_SEGMENT_SIZE = MmappedRegions.MAX_SEGMENT_SIZE;
    MmappedRegions.MAX_SEGMENT_SIZE = 1024;
    ByteBuffer buffer = allocateBuffer(128 * 1024);
    File f = FileUtils.createTempFile("testMapForCompressionMetadata", "1");
    f.deleteOnExit();
    File cf = FileUtils.createTempFile(f.name() + ".metadata", "1");
    cf.deleteOnExit();
    MetadataCollector sstableMetadataCollector = new MetadataCollector(new ClusteringComparator(BytesType.instance));
    try (SequentialWriter writer = new CompressedSequentialWriter(f, cf.absolutePath(), null, SequentialWriterOption.DEFAULT, CompressionParams.snappy(), sstableMetadataCollector)) {
        writer.write(buffer);
        writer.finish();
    }
    CompressionMetadata metadata = new CompressionMetadata(cf.absolutePath(), f.length(), true);
    try (ChannelProxy channel = new ChannelProxy(f);
        MmappedRegions regions = MmappedRegions.map(channel, metadata)) {
        assertFalse(regions.isEmpty());
        int i = 0;
        while (i < buffer.capacity()) {
            CompressionMetadata.Chunk chunk = metadata.chunkFor(i);
            MmappedRegions.Region region = regions.floor(chunk.offset);
            assertNotNull(region);
            ByteBuffer compressedChunk = region.buffer.duplicate();
            assertNotNull(compressedChunk);
            assertEquals(chunk.length + 4, compressedChunk.capacity());
            assertEquals(chunk.offset, region.offset());
            assertEquals(chunk.offset + chunk.length + 4, region.end());
            i += metadata.chunkLength();
        }
    } finally {
        MmappedRegions.MAX_SEGMENT_SIZE = OLD_MAX_SEGMENT_SIZE;
        metadata.close();
    }
}
Also used : CompressedSequentialWriter(org.apache.cassandra.io.compress.CompressedSequentialWriter) CompressionMetadata(org.apache.cassandra.io.compress.CompressionMetadata) ClusteringComparator(org.apache.cassandra.db.ClusteringComparator) CompressedSequentialWriter(org.apache.cassandra.io.compress.CompressedSequentialWriter) MetadataCollector(org.apache.cassandra.io.sstable.metadata.MetadataCollector) ByteBuffer(java.nio.ByteBuffer) Test(org.junit.Test)

Example 5 with CompressionMetadata

use of org.apache.cassandra.io.compress.CompressionMetadata in project cassandra by apache.

the class CompressedInputStreamTest method testCompressedReadWith.

/**
 * @param valuesToCheck array of longs of range(0-999)
 * @throws Exception
 */
private void testCompressedReadWith(long[] valuesToCheck, boolean testTruncate, boolean testException, double minCompressRatio) throws Exception {
    assert valuesToCheck != null && valuesToCheck.length > 0;
    // write compressed data file of longs
    File parentDir = new File(tempFolder.newFolder());
    Descriptor desc = new Descriptor(parentDir, "ks", "cf", 1);
    File tmp = new File(desc.filenameFor(Component.DATA));
    MetadataCollector collector = new MetadataCollector(new ClusteringComparator(BytesType.instance));
    CompressionParams param = CompressionParams.snappy(32, minCompressRatio);
    Map<Long, Long> index = new HashMap<Long, Long>();
    try (CompressedSequentialWriter writer = new CompressedSequentialWriter(tmp, desc.filenameFor(Component.COMPRESSION_INFO), null, SequentialWriterOption.DEFAULT, param, collector)) {
        for (long l = 0L; l < 1000; l++) {
            index.put(l, writer.position());
            writer.writeLong(l);
        }
        writer.finish();
    }
    CompressionMetadata comp = CompressionMetadata.create(tmp.absolutePath());
    List<SSTableReader.PartitionPositionBounds> sections = new ArrayList<>();
    for (long l : valuesToCheck) {
        long position = index.get(l);
        sections.add(new SSTableReader.PartitionPositionBounds(position, position + 8));
    }
    CompressionMetadata.Chunk[] chunks = comp.getChunksForSections(sections);
    long totalSize = comp.getTotalSizeForSections(sections);
    long expectedSize = 0;
    for (CompressionMetadata.Chunk c : chunks) expectedSize += c.length + 4;
    assertEquals(expectedSize, totalSize);
    // buffer up only relevant parts of file
    int size = 0;
    for (CompressionMetadata.Chunk c : chunks) // 4bytes CRC
    size += (c.length + 4);
    byte[] toRead = new byte[size];
    try (RandomAccessReader f = RandomAccessReader.open(tmp)) {
        int pos = 0;
        for (CompressionMetadata.Chunk c : chunks) {
            f.seek(c.offset);
            pos += f.read(toRead, pos, c.length + 4);
        }
    }
    if (testTruncate) {
        byte[] actuallyRead = new byte[50];
        System.arraycopy(toRead, 0, actuallyRead, 0, 50);
        toRead = actuallyRead;
    }
    // read buffer using CompressedInputStream
    CompressionInfo info = CompressionInfo.newInstance(chunks, param);
    if (testException) {
        testException(sections, info);
        return;
    }
    CompressedInputStream input = new CompressedInputStream(new DataInputStreamPlus(new ByteArrayInputStream(toRead)), info, ChecksumType.CRC32, () -> 1.0);
    try (DataInputStream in = new DataInputStream(input)) {
        for (int i = 0; i < sections.size(); i++) {
            input.position(sections.get(i).lowerPosition);
            long readValue = in.readLong();
            assertEquals("expected " + valuesToCheck[i] + " but was " + readValue, valuesToCheck[i], readValue);
        }
    }
}
Also used : ClusteringComparator(org.apache.cassandra.db.ClusteringComparator) CompressionInfo(org.apache.cassandra.db.streaming.CompressionInfo) SSTableReader(org.apache.cassandra.io.sstable.format.SSTableReader) RandomAccessReader(org.apache.cassandra.io.util.RandomAccessReader) DataInputStreamPlus(org.apache.cassandra.io.util.DataInputPlus.DataInputStreamPlus) CompressedSequentialWriter(org.apache.cassandra.io.compress.CompressedSequentialWriter) CompressionMetadata(org.apache.cassandra.io.compress.CompressionMetadata) DataInputStream(java.io.DataInputStream) CompressionParams(org.apache.cassandra.schema.CompressionParams) CompressedInputStream(org.apache.cassandra.db.streaming.CompressedInputStream) ByteArrayInputStream(java.io.ByteArrayInputStream) Descriptor(org.apache.cassandra.io.sstable.Descriptor) DatabaseDescriptor(org.apache.cassandra.config.DatabaseDescriptor) MetadataCollector(org.apache.cassandra.io.sstable.metadata.MetadataCollector) File(org.apache.cassandra.io.util.File)

Aggregations

CompressionMetadata (org.apache.cassandra.io.compress.CompressionMetadata)5 SSTableReader (org.apache.cassandra.io.sstable.format.SSTableReader)3 DataInputStream (java.io.DataInputStream)2 ByteBuffer (java.nio.ByteBuffer)2 DatabaseDescriptor (org.apache.cassandra.config.DatabaseDescriptor)2 ClusteringComparator (org.apache.cassandra.db.ClusteringComparator)2 CompressedSequentialWriter (org.apache.cassandra.io.compress.CompressedSequentialWriter)2 Descriptor (org.apache.cassandra.io.sstable.Descriptor)2 MetadataCollector (org.apache.cassandra.io.sstable.metadata.MetadataCollector)2 File (org.apache.cassandra.io.util.File)2 MinMaxPriorityQueue (com.google.common.collect.MinMaxPriorityQueue)1 ByteArrayInputStream (java.io.ByteArrayInputStream)1 IOException (java.io.IOException)1 PrintStream (java.io.PrintStream)1 PrintWriter (java.io.PrintWriter)1 Files (java.nio.file.Files)1 Arrays (java.util.Arrays)1 Comparator (java.util.Comparator)1 EnumSet (java.util.EnumSet)1 List (java.util.List)1