use of org.apache.cassandra.io.util.FileDataInput in project cassandra by apache.
the class CompactionController method getShadowIterator.
// caller to close
@SuppressWarnings("resource")
private UnfilteredRowIterator getShadowIterator(SSTableReader reader, DecoratedKey key, boolean tombstoneOnly) {
if (reader.isMarkedSuspect() || reader.getMaxTimestamp() <= minTimestamp || tombstoneOnly && !reader.hasTombstones())
return null;
RowIndexEntry<?> position = reader.getPosition(key, SSTableReader.Operator.EQ);
if (position == null)
return null;
FileDataInput dfile = openDataFiles.computeIfAbsent(reader, this::openDataFile);
return reader.simpleIterator(dfile, key, position, tombstoneOnly);
}
use of org.apache.cassandra.io.util.FileDataInput in project cassandra by apache.
the class BigTableReader method getPosition.
/**
* @param key The key to apply as the rhs to the given Operator. A 'fake' key is allowed to
* allow key selection by token bounds but only if op != * EQ
* @param op The Operator defining matching keys: the nearest key to the target matching the operator wins.
* @param updateCacheAndStats true if updating stats and cache
* @return The index entry corresponding to the key, or null if the key is not present
*/
protected RowIndexEntry getPosition(PartitionPosition key, Operator op, boolean updateCacheAndStats, boolean permitMatchPastLast) {
if (op == Operator.EQ) {
// EQ only make sense if the key is a valid row key
assert key instanceof DecoratedKey;
if (!bf.isPresent((DecoratedKey) key)) {
Tracing.trace("Bloom filter allows skipping sstable {}", descriptor.generation);
return null;
}
}
// next, the key cache (only make sense for valid row key)
if ((op == Operator.EQ || op == Operator.GE) && (key instanceof DecoratedKey)) {
DecoratedKey decoratedKey = (DecoratedKey) key;
KeyCacheKey cacheKey = new KeyCacheKey(metadata(), descriptor, decoratedKey.getKey());
RowIndexEntry cachedPosition = getCachedPosition(cacheKey, updateCacheAndStats);
if (cachedPosition != null) {
Tracing.trace("Key cache hit for sstable {}", descriptor.generation);
return cachedPosition;
}
}
// check the smallest and greatest keys in the sstable to see if it can't be present
boolean skip = false;
if (key.compareTo(first) < 0) {
if (op == Operator.EQ)
skip = true;
else
key = first;
op = Operator.EQ;
} else {
int l = last.compareTo(key);
// l <= 0 => we may be looking past the end of the file; we then narrow our behaviour to:
// 1) skipping if strictly greater for GE and EQ;
// 2) skipping if equal and searching GT, and we aren't permitting matching past last
skip = l <= 0 && (l < 0 || (!permitMatchPastLast && op == Operator.GT));
}
if (skip) {
if (op == Operator.EQ && updateCacheAndStats)
bloomFilterTracker.addFalsePositive();
Tracing.trace("Check against min and max keys allows skipping sstable {}", descriptor.generation);
return null;
}
int binarySearchResult = indexSummary.binarySearch(key);
long sampledPosition = getIndexScanPositionFromBinarySearchResult(binarySearchResult, indexSummary);
int sampledIndex = getIndexSummaryIndexFromBinarySearchResult(binarySearchResult);
int effectiveInterval = indexSummary.getEffectiveIndexIntervalAfterIndex(sampledIndex);
if (ifile == null)
return null;
// scan the on-disk index, starting at the nearest sampled position.
// The check against IndexInterval is to be exit the loop in the EQ case when the key looked for is not present
// (bloom filter false positive). But note that for non-EQ cases, we might need to check the first key of the
// next index position because the searched key can be greater the last key of the index interval checked if it
// is lesser than the first key of next interval (and in that case we must return the position of the first key
// of the next interval).
int i = 0;
String path = null;
try (FileDataInput in = ifile.createReader(sampledPosition)) {
path = in.getPath();
while (!in.isEOF()) {
i++;
ByteBuffer indexKey = ByteBufferUtil.readWithShortLength(in);
// did we find an appropriate position for the op requested
boolean opSatisfied;
// is the current position an exact match for the key, suitable for caching
boolean exactMatch;
// Compare raw keys if possible for performance, otherwise compare decorated keys.
if (op == Operator.EQ && i <= effectiveInterval) {
opSatisfied = exactMatch = indexKey.equals(((DecoratedKey) key).getKey());
} else {
DecoratedKey indexDecoratedKey = decorateKey(indexKey);
int comparison = indexDecoratedKey.compareTo(key);
int v = op.apply(comparison);
opSatisfied = (v == 0);
exactMatch = (comparison == 0);
if (v < 0) {
Tracing.trace("Partition index lookup allows skipping sstable {}", descriptor.generation);
return null;
}
}
if (opSatisfied) {
// read data position from index entry
RowIndexEntry indexEntry = rowIndexEntrySerializer.deserialize(in, in.getFilePointer());
if (exactMatch && updateCacheAndStats) {
// key can be == to the index key only if it's a true row key
assert key instanceof DecoratedKey;
DecoratedKey decoratedKey = (DecoratedKey) key;
if (logger.isTraceEnabled()) {
// expensive sanity check! see CASSANDRA-4687
try (FileDataInput fdi = dfile.createReader(indexEntry.position)) {
DecoratedKey keyInDisk = decorateKey(ByteBufferUtil.readWithShortLength(fdi));
if (!keyInDisk.equals(key))
throw new AssertionError(String.format("%s != %s in %s", keyInDisk, key, fdi.getPath()));
}
}
// store exact match for the key
cacheKey(decoratedKey, indexEntry);
}
if (op == Operator.EQ && updateCacheAndStats)
bloomFilterTracker.addTruePositive();
Tracing.trace("Partition index with {} entries found for sstable {}", indexEntry.columnsIndexCount(), descriptor.generation);
return indexEntry;
}
RowIndexEntry.Serializer.skip(in, descriptor.version);
}
} catch (IOException e) {
markSuspect();
throw new CorruptSSTableException(e, path);
}
if (op == SSTableReader.Operator.EQ && updateCacheAndStats)
bloomFilterTracker.addFalsePositive();
Tracing.trace("Partition index lookup complete (bloom filter false positive) for sstable {}", descriptor.generation);
return null;
}
use of org.apache.cassandra.io.util.FileDataInput in project cassandra by apache.
the class SegmentReaderTest method compressedSegmenter.
private void compressedSegmenter(ICompressor compressor) throws IOException {
int rawSize = (1 << 15) - 137;
ByteBuffer plainTextBuffer = compressor.preferredBufferType().allocate(rawSize);
byte[] b = new byte[rawSize];
random.nextBytes(b);
plainTextBuffer.put(b);
plainTextBuffer.flip();
// need to add in the plain text size to the block we write out
int uncompressedHeaderSize = 4;
int length = compressor.initialCompressedBufferLength(rawSize);
ByteBuffer compBuffer = ByteBufferUtil.ensureCapacity(null, length + uncompressedHeaderSize, true, compressor.preferredBufferType());
compBuffer.putInt(rawSize);
compressor.compress(plainTextBuffer, compBuffer);
compBuffer.flip();
File compressedFile = File.createTempFile("compressed-segment-", ".log");
compressedFile.deleteOnExit();
FileOutputStream fos = new FileOutputStream(compressedFile);
fos.getChannel().write(compBuffer);
fos.close();
try (RandomAccessReader reader = RandomAccessReader.open(compressedFile)) {
CompressedSegmenter segmenter = new CompressedSegmenter(compressor, reader);
int fileLength = (int) compressedFile.length();
SyncSegment syncSegment = segmenter.nextSegment(0, fileLength);
FileDataInput fileDataInput = syncSegment.input;
ByteBuffer fileBuffer = readBytes(fileDataInput, rawSize);
plainTextBuffer.flip();
Assert.assertEquals(plainTextBuffer, fileBuffer);
// CompressedSegmenter includes the Sync header length in the syncSegment.endPosition (value)
Assert.assertEquals(rawSize, syncSegment.endPosition - CommitLogSegment.SYNC_MARKER_SIZE);
}
}
use of org.apache.cassandra.io.util.FileDataInput in project cassandra by apache.
the class SSTableFlushObserverTest method testFlushObserver.
@Test
public void testFlushObserver() {
TableMetadata cfm = TableMetadata.builder(KS_NAME, CF_NAME).addPartitionKeyColumn("id", UTF8Type.instance).addRegularColumn("first_name", UTF8Type.instance).addRegularColumn("age", Int32Type.instance).addRegularColumn("height", LongType.instance).build();
LifecycleTransaction transaction = LifecycleTransaction.offline(OperationType.COMPACTION);
FlushObserver observer = new FlushObserver();
String sstableDirectory = DatabaseDescriptor.getAllDataFileLocations()[0];
File directory = new File(sstableDirectory + File.pathSeparator + KS_NAME + File.pathSeparator + CF_NAME);
directory.deleteOnExit();
if (!directory.exists() && !directory.mkdirs())
throw new FSWriteError(new IOException("failed to create tmp directory"), directory.getAbsolutePath());
SSTableFormat.Type sstableFormat = SSTableFormat.Type.current();
BigTableWriter writer = new BigTableWriter(new Descriptor(sstableFormat.info.getLatestVersion(), directory, KS_NAME, CF_NAME, 0, sstableFormat), 10L, 0L, null, TableMetadataRef.forOfflineTools(cfm), new MetadataCollector(cfm.comparator).sstableLevel(0), new SerializationHeader(true, cfm, cfm.regularAndStaticColumns(), EncodingStats.NO_STATS), Collections.singletonList(observer), transaction);
SSTableReader reader = null;
Multimap<ByteBuffer, Cell> expected = ArrayListMultimap.create();
try {
final long now = System.currentTimeMillis();
ByteBuffer key = UTF8Type.instance.fromString("key1");
expected.putAll(key, Arrays.asList(BufferCell.live(getColumn(cfm, "age"), now, Int32Type.instance.decompose(27)), BufferCell.live(getColumn(cfm, "first_name"), now, UTF8Type.instance.fromString("jack")), BufferCell.live(getColumn(cfm, "height"), now, LongType.instance.decompose(183L))));
writer.append(new RowIterator(cfm, key.duplicate(), Collections.singletonList(buildRow(expected.get(key)))));
key = UTF8Type.instance.fromString("key2");
expected.putAll(key, Arrays.asList(BufferCell.live(getColumn(cfm, "age"), now, Int32Type.instance.decompose(30)), BufferCell.live(getColumn(cfm, "first_name"), now, UTF8Type.instance.fromString("jim")), BufferCell.live(getColumn(cfm, "height"), now, LongType.instance.decompose(180L))));
writer.append(new RowIterator(cfm, key, Collections.singletonList(buildRow(expected.get(key)))));
key = UTF8Type.instance.fromString("key3");
expected.putAll(key, Arrays.asList(BufferCell.live(getColumn(cfm, "age"), now, Int32Type.instance.decompose(30)), BufferCell.live(getColumn(cfm, "first_name"), now, UTF8Type.instance.fromString("ken")), BufferCell.live(getColumn(cfm, "height"), now, LongType.instance.decompose(178L))));
writer.append(new RowIterator(cfm, key, Collections.singletonList(buildRow(expected.get(key)))));
reader = writer.finish(true);
} finally {
FileUtils.closeQuietly(writer);
}
Assert.assertTrue(observer.isComplete);
Assert.assertEquals(expected.size(), observer.rows.size());
for (Pair<ByteBuffer, Long> e : observer.rows.keySet()) {
ByteBuffer key = e.left;
Long indexPosition = e.right;
try (FileDataInput index = reader.ifile.createReader(indexPosition)) {
ByteBuffer indexKey = ByteBufferUtil.readWithShortLength(index);
Assert.assertEquals(0, UTF8Type.instance.compare(key, indexKey));
} catch (IOException ex) {
throw new FSReadError(ex, reader.getIndexFilename());
}
Assert.assertEquals(expected.get(key), observer.rows.get(e));
}
}
use of org.apache.cassandra.io.util.FileDataInput in project cassandra by apache.
the class SSTableReaderTest method testSpannedIndexPositions.
@Test
public void testSpannedIndexPositions() throws IOException {
int originalMaxSegmentSize = MmappedRegions.MAX_SEGMENT_SIZE;
// each index entry is ~11 bytes, so this will generate lots of segments
MmappedRegions.MAX_SEGMENT_SIZE = 40;
try {
Keyspace keyspace = Keyspace.open(KEYSPACE1);
ColumnFamilyStore store = keyspace.getColumnFamilyStore("Standard1");
partitioner = store.getPartitioner();
// insert a bunch of data and compact to a single sstable
CompactionManager.instance.disableAutoCompaction();
for (int j = 0; j < 100; j += 2) {
new RowUpdateBuilder(store.metadata(), j, String.valueOf(j)).clustering("0").add("val", ByteBufferUtil.EMPTY_BYTE_BUFFER).build().applyUnsafe();
}
store.forceBlockingFlush();
CompactionManager.instance.performMaximal(store, false);
// check that all our keys are found correctly
SSTableReader sstable = store.getLiveSSTables().iterator().next();
for (int j = 0; j < 100; j += 2) {
DecoratedKey dk = Util.dk(String.valueOf(j));
FileDataInput file = sstable.getFileDataInput(sstable.getPosition(dk, SSTableReader.Operator.EQ).position);
DecoratedKey keyInDisk = sstable.decorateKey(ByteBufferUtil.readWithShortLength(file));
assert keyInDisk.equals(dk) : String.format("%s != %s in %s", keyInDisk, dk, file.getPath());
}
// check no false positives
for (int j = 1; j < 110; j += 2) {
DecoratedKey dk = Util.dk(String.valueOf(j));
assert sstable.getPosition(dk, SSTableReader.Operator.EQ) == null;
}
} finally {
MmappedRegions.MAX_SEGMENT_SIZE = originalMaxSegmentSize;
}
}
Aggregations