use of org.apache.cassandra.io.compress.CompressionMetadata in project cassandra by apache.
the class TableMetrics method computeCompressionRatio.
/**
* Computes the compression ratio for the specified SSTables
*
* @param sstables the SSTables
* @return the compression ratio for the specified SSTables
*/
private static Double computeCompressionRatio(Iterable<SSTableReader> sstables) {
double compressedLengthSum = 0;
double dataLengthSum = 0;
for (SSTableReader sstable : sstables) {
if (sstable.compression) {
// using SSTableSet.CANONICAL.
assert sstable.openReason != SSTableReader.OpenReason.EARLY;
CompressionMetadata compressionMetadata = sstable.getCompressionMetadata();
compressedLengthSum += compressionMetadata.compressedFileLength;
dataLengthSum += compressionMetadata.dataLength;
}
}
return dataLengthSum != 0 ? compressedLengthSum / dataLengthSum : MetadataCollector.NO_COMPRESSION_RATIO;
}
use of org.apache.cassandra.io.compress.CompressionMetadata in project cassandra by apache.
the class ScrubTest method overrideWithGarbage.
static void overrideWithGarbage(SSTableReader sstable, ByteBuffer key1, ByteBuffer key2) throws IOException {
boolean compression = Boolean.parseBoolean(System.getProperty("cassandra.test.compression", "false"));
long startPosition, endPosition;
if (compression) {
// overwrite with garbage the compression chunks from key1 to key2
CompressionMetadata compData = CompressionMetadata.create(sstable.getFilename());
CompressionMetadata.Chunk chunk1 = compData.chunkFor(sstable.getPosition(PartitionPosition.ForKey.get(key1, sstable.getPartitioner()), SSTableReader.Operator.EQ).position);
CompressionMetadata.Chunk chunk2 = compData.chunkFor(sstable.getPosition(PartitionPosition.ForKey.get(key2, sstable.getPartitioner()), SSTableReader.Operator.EQ).position);
startPosition = Math.min(chunk1.offset, chunk2.offset);
endPosition = Math.max(chunk1.offset + chunk1.length, chunk2.offset + chunk2.length);
compData.close();
} else {
// overwrite with garbage from key1 to key2
long row0Start = sstable.getPosition(PartitionPosition.ForKey.get(key1, sstable.getPartitioner()), SSTableReader.Operator.EQ).position;
long row1Start = sstable.getPosition(PartitionPosition.ForKey.get(key2, sstable.getPartitioner()), SSTableReader.Operator.EQ).position;
startPosition = Math.min(row0Start, row1Start);
endPosition = Math.max(row0Start, row1Start);
}
overrideWithGarbage(sstable, startPosition, endPosition);
}
use of org.apache.cassandra.io.compress.CompressionMetadata in project cassandra by apache.
the class SSTableMetadataViewer method printSStableMetadata.
private void printSStableMetadata(String fname, boolean scan) throws IOException {
Descriptor descriptor = Descriptor.fromFilename(fname);
Map<MetadataType, MetadataComponent> metadata = descriptor.getMetadataSerializer().deserialize(descriptor, EnumSet.allOf(MetadataType.class));
ValidationMetadata validation = (ValidationMetadata) metadata.get(MetadataType.VALIDATION);
StatsMetadata stats = (StatsMetadata) metadata.get(MetadataType.STATS);
CompactionMetadata compaction = (CompactionMetadata) metadata.get(MetadataType.COMPACTION);
CompressionMetadata compression = null;
File compressionFile = new File(descriptor.filenameFor(Component.COMPRESSION_INFO));
if (compressionFile.exists())
compression = CompressionMetadata.create(fname);
SerializationHeader.Component header = (SerializationHeader.Component) metadata.get(MetadataType.HEADER);
field("SSTable", descriptor);
if (scan && descriptor.version.getVersion().compareTo("ma") >= 0) {
printScannedOverview(descriptor, stats);
}
if (validation != null) {
field("Partitioner", validation.partitioner);
field("Bloom Filter FP chance", validation.bloomFilterFPChance);
}
if (stats != null) {
field("Minimum timestamp", stats.minTimestamp, toDateString(stats.minTimestamp, tsUnit));
field("Maximum timestamp", stats.maxTimestamp, toDateString(stats.maxTimestamp, tsUnit));
field("SSTable min local deletion time", stats.minLocalDeletionTime, deletion(stats.minLocalDeletionTime));
field("SSTable max local deletion time", stats.maxLocalDeletionTime, deletion(stats.maxLocalDeletionTime));
field("Compressor", compression != null ? compression.compressor().getClass().getName() : "-");
if (compression != null)
field("Compression ratio", stats.compressionRatio);
field("TTL min", stats.minTTL, toDurationString(stats.minTTL, TimeUnit.SECONDS));
field("TTL max", stats.maxTTL, toDurationString(stats.maxTTL, TimeUnit.SECONDS));
if (validation != null && header != null)
printMinMaxToken(descriptor, FBUtilities.newPartitioner(descriptor), header.getKeyType());
if (header != null && header.getClusteringTypes().size() == stats.minClusteringValues.size()) {
List<AbstractType<?>> clusteringTypes = header.getClusteringTypes();
List<ByteBuffer> minClusteringValues = stats.minClusteringValues;
List<ByteBuffer> maxClusteringValues = stats.maxClusteringValues;
String[] minValues = new String[clusteringTypes.size()];
String[] maxValues = new String[clusteringTypes.size()];
for (int i = 0; i < clusteringTypes.size(); i++) {
minValues[i] = clusteringTypes.get(i).getString(minClusteringValues.get(i));
maxValues[i] = clusteringTypes.get(i).getString(maxClusteringValues.get(i));
}
field("minClusteringValues", Arrays.toString(minValues));
field("maxClusteringValues", Arrays.toString(maxValues));
}
field("Estimated droppable tombstones", stats.getEstimatedDroppableTombstoneRatio((int) (currentTimeMillis() / 1000) - this.gc));
field("SSTable Level", stats.sstableLevel);
field("Repaired at", stats.repairedAt, toDateString(stats.repairedAt, TimeUnit.MILLISECONDS));
field("Pending repair", stats.pendingRepair);
field("Replay positions covered", stats.commitLogIntervals);
field("totalColumnsSet", stats.totalColumnsSet);
field("totalRows", stats.totalRows);
field("Estimated tombstone drop times", "");
TermHistogram estDropped = new TermHistogram(stats.estimatedTombstoneDropTime, "Drop Time", offset -> String.format("%d %s", offset, Util.wrapQuiet(toDateString(offset, TimeUnit.SECONDS), color)), String::valueOf);
estDropped.printHistogram(out, color, unicode);
field("Partition Size", "");
TermHistogram rowSize = new TermHistogram(stats.estimatedPartitionSize, "Size (bytes)", offset -> String.format("%d %s", offset, Util.wrapQuiet(toByteString(offset), color)), String::valueOf);
rowSize.printHistogram(out, color, unicode);
field("Column Count", "");
TermHistogram cellCount = new TermHistogram(stats.estimatedCellPerPartitionCount, "Columns", String::valueOf, String::valueOf);
cellCount.printHistogram(out, color, unicode);
}
if (compaction != null) {
field("Estimated cardinality", compaction.cardinalityEstimator.cardinality());
}
if (header != null) {
EncodingStats encodingStats = header.getEncodingStats();
AbstractType<?> keyType = header.getKeyType();
List<AbstractType<?>> clusteringTypes = header.getClusteringTypes();
Map<ByteBuffer, AbstractType<?>> staticColumns = header.getStaticColumns();
Map<String, String> statics = staticColumns.entrySet().stream().collect(Collectors.toMap(e -> UTF8Type.instance.getString(e.getKey()), e -> e.getValue().toString()));
Map<ByteBuffer, AbstractType<?>> regularColumns = header.getRegularColumns();
Map<String, String> regulars = regularColumns.entrySet().stream().collect(Collectors.toMap(e -> UTF8Type.instance.getString(e.getKey()), e -> e.getValue().toString()));
field("EncodingStats minTTL", encodingStats.minTTL, toDurationString(encodingStats.minTTL, TimeUnit.SECONDS));
field("EncodingStats minLocalDeletionTime", encodingStats.minLocalDeletionTime, toDateString(encodingStats.minLocalDeletionTime, TimeUnit.SECONDS));
field("EncodingStats minTimestamp", encodingStats.minTimestamp, toDateString(encodingStats.minTimestamp, tsUnit));
field("KeyType", keyType.toString());
field("ClusteringTypes", clusteringTypes.toString());
field("StaticColumns", FBUtilities.toString(statics));
field("RegularColumns", FBUtilities.toString(regulars));
field("IsTransient", stats.isTransient);
}
}
use of org.apache.cassandra.io.compress.CompressionMetadata in project cassandra by apache.
the class MmappedRegionsTest method testMapForCompressionMetadata.
@Test
public void testMapForCompressionMetadata() throws Exception {
int OLD_MAX_SEGMENT_SIZE = MmappedRegions.MAX_SEGMENT_SIZE;
MmappedRegions.MAX_SEGMENT_SIZE = 1024;
ByteBuffer buffer = allocateBuffer(128 * 1024);
File f = FileUtils.createTempFile("testMapForCompressionMetadata", "1");
f.deleteOnExit();
File cf = FileUtils.createTempFile(f.name() + ".metadata", "1");
cf.deleteOnExit();
MetadataCollector sstableMetadataCollector = new MetadataCollector(new ClusteringComparator(BytesType.instance));
try (SequentialWriter writer = new CompressedSequentialWriter(f, cf.absolutePath(), null, SequentialWriterOption.DEFAULT, CompressionParams.snappy(), sstableMetadataCollector)) {
writer.write(buffer);
writer.finish();
}
CompressionMetadata metadata = new CompressionMetadata(cf.absolutePath(), f.length(), true);
try (ChannelProxy channel = new ChannelProxy(f);
MmappedRegions regions = MmappedRegions.map(channel, metadata)) {
assertFalse(regions.isEmpty());
int i = 0;
while (i < buffer.capacity()) {
CompressionMetadata.Chunk chunk = metadata.chunkFor(i);
MmappedRegions.Region region = regions.floor(chunk.offset);
assertNotNull(region);
ByteBuffer compressedChunk = region.buffer.duplicate();
assertNotNull(compressedChunk);
assertEquals(chunk.length + 4, compressedChunk.capacity());
assertEquals(chunk.offset, region.offset());
assertEquals(chunk.offset + chunk.length + 4, region.end());
i += metadata.chunkLength();
}
} finally {
MmappedRegions.MAX_SEGMENT_SIZE = OLD_MAX_SEGMENT_SIZE;
metadata.close();
}
}
use of org.apache.cassandra.io.compress.CompressionMetadata in project cassandra by apache.
the class CompressedInputStreamTest method testCompressedReadWith.
/**
* @param valuesToCheck array of longs of range(0-999)
* @throws Exception
*/
private void testCompressedReadWith(long[] valuesToCheck, boolean testTruncate, boolean testException, double minCompressRatio) throws Exception {
assert valuesToCheck != null && valuesToCheck.length > 0;
// write compressed data file of longs
File parentDir = new File(tempFolder.newFolder());
Descriptor desc = new Descriptor(parentDir, "ks", "cf", 1);
File tmp = new File(desc.filenameFor(Component.DATA));
MetadataCollector collector = new MetadataCollector(new ClusteringComparator(BytesType.instance));
CompressionParams param = CompressionParams.snappy(32, minCompressRatio);
Map<Long, Long> index = new HashMap<Long, Long>();
try (CompressedSequentialWriter writer = new CompressedSequentialWriter(tmp, desc.filenameFor(Component.COMPRESSION_INFO), null, SequentialWriterOption.DEFAULT, param, collector)) {
for (long l = 0L; l < 1000; l++) {
index.put(l, writer.position());
writer.writeLong(l);
}
writer.finish();
}
CompressionMetadata comp = CompressionMetadata.create(tmp.absolutePath());
List<SSTableReader.PartitionPositionBounds> sections = new ArrayList<>();
for (long l : valuesToCheck) {
long position = index.get(l);
sections.add(new SSTableReader.PartitionPositionBounds(position, position + 8));
}
CompressionMetadata.Chunk[] chunks = comp.getChunksForSections(sections);
long totalSize = comp.getTotalSizeForSections(sections);
long expectedSize = 0;
for (CompressionMetadata.Chunk c : chunks) expectedSize += c.length + 4;
assertEquals(expectedSize, totalSize);
// buffer up only relevant parts of file
int size = 0;
for (CompressionMetadata.Chunk c : chunks) // 4bytes CRC
size += (c.length + 4);
byte[] toRead = new byte[size];
try (RandomAccessReader f = RandomAccessReader.open(tmp)) {
int pos = 0;
for (CompressionMetadata.Chunk c : chunks) {
f.seek(c.offset);
pos += f.read(toRead, pos, c.length + 4);
}
}
if (testTruncate) {
byte[] actuallyRead = new byte[50];
System.arraycopy(toRead, 0, actuallyRead, 0, 50);
toRead = actuallyRead;
}
// read buffer using CompressedInputStream
CompressionInfo info = CompressionInfo.newInstance(chunks, param);
if (testException) {
testException(sections, info);
return;
}
CompressedInputStream input = new CompressedInputStream(new DataInputStreamPlus(new ByteArrayInputStream(toRead)), info, ChecksumType.CRC32, () -> 1.0);
try (DataInputStream in = new DataInputStream(input)) {
for (int i = 0; i < sections.size(); i++) {
input.position(sections.get(i).lowerPosition);
long readValue = in.readLong();
assertEquals("expected " + valuesToCheck[i] + " but was " + readValue, valuesToCheck[i], readValue);
}
}
}
Aggregations