use of org.apache.kafka.snapshot.RawSnapshotReader in project kafka by apache.
the class MockLogTest method testCreateSnapshot.
@Test
public void testCreateSnapshot() throws IOException {
int numberOfRecords = 10;
int epoch = 0;
OffsetAndEpoch snapshotId = new OffsetAndEpoch(numberOfRecords, epoch);
appendBatch(numberOfRecords, epoch);
log.updateHighWatermark(new LogOffsetMetadata(numberOfRecords));
try (RawSnapshotWriter snapshot = log.createNewSnapshot(snapshotId).get()) {
snapshot.freeze();
}
RawSnapshotReader snapshot = log.readSnapshot(snapshotId).get();
assertEquals(0, snapshot.sizeInBytes());
}
use of org.apache.kafka.snapshot.RawSnapshotReader in project kafka by apache.
the class LocalLogManager method scheduleLogCheck.
private void scheduleLogCheck() {
eventQueue.append(() -> {
try {
log.debug("Node {}: running log check.", nodeId);
int numEntriesFound = 0;
for (MetaLogListenerData listenerData : listeners.values()) {
while (true) {
// Load the snapshot if needed and we are not the leader
LeaderAndEpoch notifiedLeader = listenerData.notifiedLeader();
if (!OptionalInt.of(nodeId).equals(notifiedLeader.leaderId())) {
Optional<RawSnapshotReader> snapshot = shared.nextSnapshot(listenerData.offset());
if (snapshot.isPresent()) {
log.trace("Node {}: handling snapshot with id {}.", nodeId, snapshot.get().snapshotId());
listenerData.handleSnapshot(RecordsSnapshotReader.of(snapshot.get(), new MetadataRecordSerde(), BufferSupplier.create(), Integer.MAX_VALUE));
}
}
Entry<Long, LocalBatch> entry = shared.nextBatch(listenerData.offset());
if (entry == null) {
log.trace("Node {}: reached the end of the log after finding " + "{} entries.", nodeId, numEntriesFound);
break;
}
long entryOffset = entry.getKey();
if (entryOffset > maxReadOffset) {
log.trace("Node {}: after {} entries, not reading the next " + "entry because its offset is {}, and maxReadOffset is {}.", nodeId, numEntriesFound, entryOffset, maxReadOffset);
break;
}
if (entry.getValue() instanceof LeaderChangeBatch) {
LeaderChangeBatch batch = (LeaderChangeBatch) entry.getValue();
log.trace("Node {}: handling LeaderChange to {}.", nodeId, batch.newLeader);
// Only notify the listener if it equals the shared leader state
LeaderAndEpoch sharedLeader = shared.leaderAndEpoch();
if (batch.newLeader.equals(sharedLeader)) {
listenerData.handleLeaderChange(entryOffset, batch.newLeader);
if (batch.newLeader.epoch() > leader.epoch()) {
leader = batch.newLeader;
}
} else {
log.debug("Node {}: Ignoring {} since it doesn't match the latest known leader {}", nodeId, batch.newLeader, sharedLeader);
listenerData.setOffset(entryOffset);
}
} else if (entry.getValue() instanceof LocalRecordBatch) {
LocalRecordBatch batch = (LocalRecordBatch) entry.getValue();
log.trace("Node {}: handling LocalRecordBatch with offset {}.", nodeId, entryOffset);
ObjectSerializationCache objectCache = new ObjectSerializationCache();
listenerData.handleCommit(MemoryBatchReader.of(Collections.singletonList(Batch.data(entryOffset - batch.records.size() + 1, batch.leaderEpoch, batch.appendTimestamp, batch.records.stream().mapToInt(record -> messageSize(record, objectCache)).sum(), batch.records)), reader -> {
}));
}
numEntriesFound++;
}
}
log.trace("Completed log check for node " + nodeId);
} catch (Exception e) {
log.error("Exception while handling log check", e);
}
});
}
use of org.apache.kafka.snapshot.RawSnapshotReader in project kafka by apache.
the class KafkaRaftClient method handleFetchSnapshotRequest.
/**
* Handle a FetchSnapshot request, similar to the Fetch request but we use {@link UnalignedRecords}
* in response because the records are not necessarily offset-aligned.
*
* This API may return the following errors:
*
* - {@link Errors#INCONSISTENT_CLUSTER_ID} if the cluster id is presented in request
* but different from this node
* - {@link Errors#BROKER_NOT_AVAILABLE} if this node is currently shutting down
* - {@link Errors#FENCED_LEADER_EPOCH} if the epoch is smaller than this node's epoch
* - {@link Errors#INVALID_REQUEST} if the request epoch is larger than the leader's current epoch
* or if either the fetch offset or the last fetched epoch is invalid
* - {@link Errors#SNAPSHOT_NOT_FOUND} if the request snapshot id does not exists
* - {@link Errors#POSITION_OUT_OF_RANGE} if the request snapshot offset out of range
*/
private FetchSnapshotResponseData handleFetchSnapshotRequest(RaftRequest.Inbound requestMetadata) {
FetchSnapshotRequestData data = (FetchSnapshotRequestData) requestMetadata.data;
if (!hasValidClusterId(data.clusterId())) {
return new FetchSnapshotResponseData().setErrorCode(Errors.INCONSISTENT_CLUSTER_ID.code());
}
if (data.topics().size() != 1 && data.topics().get(0).partitions().size() != 1) {
return FetchSnapshotResponse.withTopLevelError(Errors.INVALID_REQUEST);
}
Optional<FetchSnapshotRequestData.PartitionSnapshot> partitionSnapshotOpt = FetchSnapshotRequest.forTopicPartition(data, log.topicPartition());
if (!partitionSnapshotOpt.isPresent()) {
// The Raft client assumes that there is only one topic partition.
TopicPartition unknownTopicPartition = new TopicPartition(data.topics().get(0).name(), data.topics().get(0).partitions().get(0).partition());
return FetchSnapshotResponse.singleton(unknownTopicPartition, responsePartitionSnapshot -> responsePartitionSnapshot.setErrorCode(Errors.UNKNOWN_TOPIC_OR_PARTITION.code()));
}
FetchSnapshotRequestData.PartitionSnapshot partitionSnapshot = partitionSnapshotOpt.get();
Optional<Errors> leaderValidation = validateLeaderOnlyRequest(partitionSnapshot.currentLeaderEpoch());
if (leaderValidation.isPresent()) {
return FetchSnapshotResponse.singleton(log.topicPartition(), responsePartitionSnapshot -> addQuorumLeader(responsePartitionSnapshot).setErrorCode(leaderValidation.get().code()));
}
OffsetAndEpoch snapshotId = new OffsetAndEpoch(partitionSnapshot.snapshotId().endOffset(), partitionSnapshot.snapshotId().epoch());
Optional<RawSnapshotReader> snapshotOpt = log.readSnapshot(snapshotId);
if (!snapshotOpt.isPresent()) {
return FetchSnapshotResponse.singleton(log.topicPartition(), responsePartitionSnapshot -> addQuorumLeader(responsePartitionSnapshot).setErrorCode(Errors.SNAPSHOT_NOT_FOUND.code()));
}
RawSnapshotReader snapshot = snapshotOpt.get();
long snapshotSize = snapshot.sizeInBytes();
if (partitionSnapshot.position() < 0 || partitionSnapshot.position() >= snapshotSize) {
return FetchSnapshotResponse.singleton(log.topicPartition(), responsePartitionSnapshot -> addQuorumLeader(responsePartitionSnapshot).setErrorCode(Errors.POSITION_OUT_OF_RANGE.code()));
}
if (partitionSnapshot.position() > Integer.MAX_VALUE) {
throw new IllegalStateException(String.format("Trying to fetch a snapshot with size (%s) and a position (%s) larger than %s", snapshotSize, partitionSnapshot.position(), Integer.MAX_VALUE));
}
int maxSnapshotSize;
try {
maxSnapshotSize = Math.toIntExact(snapshotSize);
} catch (ArithmeticException e) {
maxSnapshotSize = Integer.MAX_VALUE;
}
UnalignedRecords records = snapshot.slice(partitionSnapshot.position(), Math.min(data.maxBytes(), maxSnapshotSize));
return FetchSnapshotResponse.singleton(log.topicPartition(), responsePartitionSnapshot -> {
addQuorumLeader(responsePartitionSnapshot).snapshotId().setEndOffset(snapshotId.offset).setEpoch(snapshotId.epoch);
return responsePartitionSnapshot.setSize(snapshotSize).setPosition(partitionSnapshot.position()).setUnalignedRecords(records);
});
}
use of org.apache.kafka.snapshot.RawSnapshotReader in project kafka by apache.
the class KafkaRaftClientSnapshotTest method testFetchSnapshotResponsePartialData.
@Test
public void testFetchSnapshotResponsePartialData() throws Exception {
int localId = 0;
int leaderId = localId + 1;
Set<Integer> voters = Utils.mkSet(localId, leaderId);
int epoch = 2;
OffsetAndEpoch snapshotId = new OffsetAndEpoch(100L, 1);
RaftClientTestContext context = new RaftClientTestContext.Builder(localId, voters).withElectedLeader(epoch, leaderId).build();
context.pollUntilRequest();
RaftRequest.Outbound fetchRequest = context.assertSentFetchRequest();
context.assertFetchRequestData(fetchRequest, epoch, 0L, 0);
context.deliverResponse(fetchRequest.correlationId, fetchRequest.destinationId(), snapshotFetchResponse(context.metadataPartition, context.metadataTopicId, epoch, leaderId, snapshotId, 200L));
context.pollUntilRequest();
RaftRequest.Outbound snapshotRequest = context.assertSentFetchSnapshotRequest();
FetchSnapshotRequestData.PartitionSnapshot request = assertFetchSnapshotRequest(snapshotRequest, context.metadataPartition, localId, Integer.MAX_VALUE).get();
assertEquals(snapshotId.offset, request.snapshotId().endOffset());
assertEquals(snapshotId.epoch, request.snapshotId().epoch());
assertEquals(0, request.position());
List<String> records = Arrays.asList("foo", "bar");
MemorySnapshotWriter memorySnapshot = new MemorySnapshotWriter(snapshotId);
try (SnapshotWriter<String> snapshotWriter = snapshotWriter(context, memorySnapshot)) {
snapshotWriter.append(records);
snapshotWriter.freeze();
}
ByteBuffer sendingBuffer = memorySnapshot.buffer().slice();
sendingBuffer.limit(sendingBuffer.limit() / 2);
context.deliverResponse(snapshotRequest.correlationId, snapshotRequest.destinationId(), fetchSnapshotResponse(context.metadataPartition, epoch, leaderId, snapshotId, memorySnapshot.buffer().remaining(), 0L, sendingBuffer));
context.pollUntilRequest();
snapshotRequest = context.assertSentFetchSnapshotRequest();
request = assertFetchSnapshotRequest(snapshotRequest, context.metadataPartition, localId, Integer.MAX_VALUE).get();
assertEquals(snapshotId.offset, request.snapshotId().endOffset());
assertEquals(snapshotId.epoch, request.snapshotId().epoch());
assertEquals(sendingBuffer.limit(), request.position());
sendingBuffer = memorySnapshot.buffer().slice();
sendingBuffer.position(Math.toIntExact(request.position()));
context.deliverResponse(snapshotRequest.correlationId, snapshotRequest.destinationId(), fetchSnapshotResponse(context.metadataPartition, epoch, leaderId, snapshotId, memorySnapshot.buffer().remaining(), request.position(), sendingBuffer));
context.pollUntilRequest();
fetchRequest = context.assertSentFetchRequest();
context.assertFetchRequestData(fetchRequest, epoch, snapshotId.offset, snapshotId.epoch);
// Check that the snapshot was written to the log
RawSnapshotReader snapshot = context.log.readSnapshot(snapshotId).get();
assertEquals(memorySnapshot.buffer().remaining(), snapshot.sizeInBytes());
SnapshotWriterReaderTest.assertSnapshot(Arrays.asList(records), snapshot);
// Check that listener was notified of the new snapshot
try (SnapshotReader<String> reader = context.listener.drainHandledSnapshot().get()) {
assertEquals(snapshotId, reader.snapshotId());
SnapshotWriterReaderTest.assertSnapshot(Arrays.asList(records), reader);
}
}
use of org.apache.kafka.snapshot.RawSnapshotReader in project kafka by apache.
the class KafkaRaftClientSnapshotTest method testPartialFetchSnapshotRequestAsLeader.
@Test
public void testPartialFetchSnapshotRequestAsLeader() throws Exception {
int localId = 0;
Set<Integer> voters = Utils.mkSet(localId, localId + 1);
OffsetAndEpoch snapshotId = new OffsetAndEpoch(2, 1);
List<String> records = Arrays.asList("foo", "bar");
RaftClientTestContext context = new RaftClientTestContext.Builder(localId, voters).appendToLog(snapshotId.epoch, records).build();
context.becomeLeader();
int epoch = context.currentEpoch();
context.advanceLocalLeaderHighWatermarkToLogEndOffset();
try (SnapshotWriter<String> snapshot = context.client.createSnapshot(snapshotId.offset - 1, snapshotId.epoch, 0).get()) {
assertEquals(snapshotId, snapshot.snapshotId());
snapshot.append(records);
snapshot.freeze();
}
RawSnapshotReader snapshot = context.log.readSnapshot(snapshotId).get();
// Fetch half of the snapshot
context.deliverRequest(fetchSnapshotRequest(context.metadataPartition, epoch, snapshotId, Math.toIntExact(snapshot.sizeInBytes() / 2), 0));
context.client.poll();
FetchSnapshotResponseData.PartitionSnapshot response = context.assertSentFetchSnapshotResponse(context.metadataPartition).get();
assertEquals(Errors.NONE, Errors.forCode(response.errorCode()));
assertEquals(snapshot.sizeInBytes(), response.size());
assertEquals(0, response.position());
assertEquals(snapshot.sizeInBytes() / 2, response.unalignedRecords().sizeInBytes());
UnalignedMemoryRecords memoryRecords = (UnalignedMemoryRecords) snapshot.slice(0, Math.toIntExact(snapshot.sizeInBytes()));
ByteBuffer snapshotBuffer = memoryRecords.buffer();
ByteBuffer responseBuffer = ByteBuffer.allocate(Math.toIntExact(snapshot.sizeInBytes()));
responseBuffer.put(((UnalignedMemoryRecords) response.unalignedRecords()).buffer());
ByteBuffer expectedBytes = snapshotBuffer.duplicate();
expectedBytes.limit(Math.toIntExact(snapshot.sizeInBytes() / 2));
assertEquals(expectedBytes, responseBuffer.duplicate().flip());
// Fetch the remainder of the snapshot
context.deliverRequest(fetchSnapshotRequest(context.metadataPartition, epoch, snapshotId, Integer.MAX_VALUE, responseBuffer.position()));
context.client.poll();
response = context.assertSentFetchSnapshotResponse(context.metadataPartition).get();
assertEquals(Errors.NONE, Errors.forCode(response.errorCode()));
assertEquals(snapshot.sizeInBytes(), response.size());
assertEquals(responseBuffer.position(), response.position());
assertEquals(snapshot.sizeInBytes() - (snapshot.sizeInBytes() / 2), response.unalignedRecords().sizeInBytes());
responseBuffer.put(((UnalignedMemoryRecords) response.unalignedRecords()).buffer());
assertEquals(snapshotBuffer, responseBuffer.flip());
}
Aggregations