Search in sources :

Example 1 with RawSnapshotReader

use of org.apache.kafka.snapshot.RawSnapshotReader in project kafka by apache.

the class MockLogTest method testCreateSnapshot.

@Test
public void testCreateSnapshot() throws IOException {
    int numberOfRecords = 10;
    int epoch = 0;
    OffsetAndEpoch snapshotId = new OffsetAndEpoch(numberOfRecords, epoch);
    appendBatch(numberOfRecords, epoch);
    log.updateHighWatermark(new LogOffsetMetadata(numberOfRecords));
    try (RawSnapshotWriter snapshot = log.createNewSnapshot(snapshotId).get()) {
        snapshot.freeze();
    }
    RawSnapshotReader snapshot = log.readSnapshot(snapshotId).get();
    assertEquals(0, snapshot.sizeInBytes());
}
Also used : RawSnapshotWriter(org.apache.kafka.snapshot.RawSnapshotWriter) RawSnapshotReader(org.apache.kafka.snapshot.RawSnapshotReader) Test(org.junit.jupiter.api.Test)

Example 2 with RawSnapshotReader

use of org.apache.kafka.snapshot.RawSnapshotReader in project kafka by apache.

the class LocalLogManager method scheduleLogCheck.

private void scheduleLogCheck() {
    eventQueue.append(() -> {
        try {
            log.debug("Node {}: running log check.", nodeId);
            int numEntriesFound = 0;
            for (MetaLogListenerData listenerData : listeners.values()) {
                while (true) {
                    // Load the snapshot if needed and we are not the leader
                    LeaderAndEpoch notifiedLeader = listenerData.notifiedLeader();
                    if (!OptionalInt.of(nodeId).equals(notifiedLeader.leaderId())) {
                        Optional<RawSnapshotReader> snapshot = shared.nextSnapshot(listenerData.offset());
                        if (snapshot.isPresent()) {
                            log.trace("Node {}: handling snapshot with id {}.", nodeId, snapshot.get().snapshotId());
                            listenerData.handleSnapshot(RecordsSnapshotReader.of(snapshot.get(), new MetadataRecordSerde(), BufferSupplier.create(), Integer.MAX_VALUE));
                        }
                    }
                    Entry<Long, LocalBatch> entry = shared.nextBatch(listenerData.offset());
                    if (entry == null) {
                        log.trace("Node {}: reached the end of the log after finding " + "{} entries.", nodeId, numEntriesFound);
                        break;
                    }
                    long entryOffset = entry.getKey();
                    if (entryOffset > maxReadOffset) {
                        log.trace("Node {}: after {} entries, not reading the next " + "entry because its offset is {}, and maxReadOffset is {}.", nodeId, numEntriesFound, entryOffset, maxReadOffset);
                        break;
                    }
                    if (entry.getValue() instanceof LeaderChangeBatch) {
                        LeaderChangeBatch batch = (LeaderChangeBatch) entry.getValue();
                        log.trace("Node {}: handling LeaderChange to {}.", nodeId, batch.newLeader);
                        // Only notify the listener if it equals the shared leader state
                        LeaderAndEpoch sharedLeader = shared.leaderAndEpoch();
                        if (batch.newLeader.equals(sharedLeader)) {
                            listenerData.handleLeaderChange(entryOffset, batch.newLeader);
                            if (batch.newLeader.epoch() > leader.epoch()) {
                                leader = batch.newLeader;
                            }
                        } else {
                            log.debug("Node {}: Ignoring {} since it doesn't match the latest known leader {}", nodeId, batch.newLeader, sharedLeader);
                            listenerData.setOffset(entryOffset);
                        }
                    } else if (entry.getValue() instanceof LocalRecordBatch) {
                        LocalRecordBatch batch = (LocalRecordBatch) entry.getValue();
                        log.trace("Node {}: handling LocalRecordBatch with offset {}.", nodeId, entryOffset);
                        ObjectSerializationCache objectCache = new ObjectSerializationCache();
                        listenerData.handleCommit(MemoryBatchReader.of(Collections.singletonList(Batch.data(entryOffset - batch.records.size() + 1, batch.leaderEpoch, batch.appendTimestamp, batch.records.stream().mapToInt(record -> messageSize(record, objectCache)).sum(), batch.records)), reader -> {
                        }));
                    }
                    numEntriesFound++;
                }
            }
            log.trace("Completed log check for node " + nodeId);
        } catch (Exception e) {
            log.error("Exception while handling log check", e);
        }
    });
}
Also used : MockRawSnapshotReader(org.apache.kafka.snapshot.MockRawSnapshotReader) RawSnapshotReader(org.apache.kafka.snapshot.RawSnapshotReader) ExecutionException(java.util.concurrent.ExecutionException) ObjectSerializationCache(org.apache.kafka.common.protocol.ObjectSerializationCache) MetadataRecordSerde(org.apache.kafka.metadata.MetadataRecordSerde) OptionalLong(java.util.OptionalLong) LeaderAndEpoch(org.apache.kafka.raft.LeaderAndEpoch)

Example 3 with RawSnapshotReader

use of org.apache.kafka.snapshot.RawSnapshotReader in project kafka by apache.

the class KafkaRaftClient method handleFetchSnapshotRequest.

/**
 * Handle a FetchSnapshot request, similar to the Fetch request but we use {@link UnalignedRecords}
 * in response because the records are not necessarily offset-aligned.
 *
 * This API may return the following errors:
 *
 * - {@link Errors#INCONSISTENT_CLUSTER_ID} if the cluster id is presented in request
 *     but different from this node
 * - {@link Errors#BROKER_NOT_AVAILABLE} if this node is currently shutting down
 * - {@link Errors#FENCED_LEADER_EPOCH} if the epoch is smaller than this node's epoch
 * - {@link Errors#INVALID_REQUEST} if the request epoch is larger than the leader's current epoch
 *     or if either the fetch offset or the last fetched epoch is invalid
 * - {@link Errors#SNAPSHOT_NOT_FOUND} if the request snapshot id does not exists
 * - {@link Errors#POSITION_OUT_OF_RANGE} if the request snapshot offset out of range
 */
private FetchSnapshotResponseData handleFetchSnapshotRequest(RaftRequest.Inbound requestMetadata) {
    FetchSnapshotRequestData data = (FetchSnapshotRequestData) requestMetadata.data;
    if (!hasValidClusterId(data.clusterId())) {
        return new FetchSnapshotResponseData().setErrorCode(Errors.INCONSISTENT_CLUSTER_ID.code());
    }
    if (data.topics().size() != 1 && data.topics().get(0).partitions().size() != 1) {
        return FetchSnapshotResponse.withTopLevelError(Errors.INVALID_REQUEST);
    }
    Optional<FetchSnapshotRequestData.PartitionSnapshot> partitionSnapshotOpt = FetchSnapshotRequest.forTopicPartition(data, log.topicPartition());
    if (!partitionSnapshotOpt.isPresent()) {
        // The Raft client assumes that there is only one topic partition.
        TopicPartition unknownTopicPartition = new TopicPartition(data.topics().get(0).name(), data.topics().get(0).partitions().get(0).partition());
        return FetchSnapshotResponse.singleton(unknownTopicPartition, responsePartitionSnapshot -> responsePartitionSnapshot.setErrorCode(Errors.UNKNOWN_TOPIC_OR_PARTITION.code()));
    }
    FetchSnapshotRequestData.PartitionSnapshot partitionSnapshot = partitionSnapshotOpt.get();
    Optional<Errors> leaderValidation = validateLeaderOnlyRequest(partitionSnapshot.currentLeaderEpoch());
    if (leaderValidation.isPresent()) {
        return FetchSnapshotResponse.singleton(log.topicPartition(), responsePartitionSnapshot -> addQuorumLeader(responsePartitionSnapshot).setErrorCode(leaderValidation.get().code()));
    }
    OffsetAndEpoch snapshotId = new OffsetAndEpoch(partitionSnapshot.snapshotId().endOffset(), partitionSnapshot.snapshotId().epoch());
    Optional<RawSnapshotReader> snapshotOpt = log.readSnapshot(snapshotId);
    if (!snapshotOpt.isPresent()) {
        return FetchSnapshotResponse.singleton(log.topicPartition(), responsePartitionSnapshot -> addQuorumLeader(responsePartitionSnapshot).setErrorCode(Errors.SNAPSHOT_NOT_FOUND.code()));
    }
    RawSnapshotReader snapshot = snapshotOpt.get();
    long snapshotSize = snapshot.sizeInBytes();
    if (partitionSnapshot.position() < 0 || partitionSnapshot.position() >= snapshotSize) {
        return FetchSnapshotResponse.singleton(log.topicPartition(), responsePartitionSnapshot -> addQuorumLeader(responsePartitionSnapshot).setErrorCode(Errors.POSITION_OUT_OF_RANGE.code()));
    }
    if (partitionSnapshot.position() > Integer.MAX_VALUE) {
        throw new IllegalStateException(String.format("Trying to fetch a snapshot with size (%s) and a position (%s) larger than %s", snapshotSize, partitionSnapshot.position(), Integer.MAX_VALUE));
    }
    int maxSnapshotSize;
    try {
        maxSnapshotSize = Math.toIntExact(snapshotSize);
    } catch (ArithmeticException e) {
        maxSnapshotSize = Integer.MAX_VALUE;
    }
    UnalignedRecords records = snapshot.slice(partitionSnapshot.position(), Math.min(data.maxBytes(), maxSnapshotSize));
    return FetchSnapshotResponse.singleton(log.topicPartition(), responsePartitionSnapshot -> {
        addQuorumLeader(responsePartitionSnapshot).snapshotId().setEndOffset(snapshotId.offset).setEpoch(snapshotId.epoch);
        return responsePartitionSnapshot.setSize(snapshotSize).setPosition(partitionSnapshot.position()).setUnalignedRecords(records);
    });
}
Also used : FetchSnapshotResponseData(org.apache.kafka.common.message.FetchSnapshotResponseData) RawSnapshotReader(org.apache.kafka.snapshot.RawSnapshotReader) Errors(org.apache.kafka.common.protocol.Errors) TopicPartition(org.apache.kafka.common.TopicPartition) RaftUtil.hasValidTopicPartition(org.apache.kafka.raft.RaftUtil.hasValidTopicPartition) FetchSnapshotRequestData(org.apache.kafka.common.message.FetchSnapshotRequestData) UnalignedRecords(org.apache.kafka.common.record.UnalignedRecords)

Example 4 with RawSnapshotReader

use of org.apache.kafka.snapshot.RawSnapshotReader in project kafka by apache.

the class KafkaRaftClientSnapshotTest method testFetchSnapshotResponsePartialData.

@Test
public void testFetchSnapshotResponsePartialData() throws Exception {
    int localId = 0;
    int leaderId = localId + 1;
    Set<Integer> voters = Utils.mkSet(localId, leaderId);
    int epoch = 2;
    OffsetAndEpoch snapshotId = new OffsetAndEpoch(100L, 1);
    RaftClientTestContext context = new RaftClientTestContext.Builder(localId, voters).withElectedLeader(epoch, leaderId).build();
    context.pollUntilRequest();
    RaftRequest.Outbound fetchRequest = context.assertSentFetchRequest();
    context.assertFetchRequestData(fetchRequest, epoch, 0L, 0);
    context.deliverResponse(fetchRequest.correlationId, fetchRequest.destinationId(), snapshotFetchResponse(context.metadataPartition, context.metadataTopicId, epoch, leaderId, snapshotId, 200L));
    context.pollUntilRequest();
    RaftRequest.Outbound snapshotRequest = context.assertSentFetchSnapshotRequest();
    FetchSnapshotRequestData.PartitionSnapshot request = assertFetchSnapshotRequest(snapshotRequest, context.metadataPartition, localId, Integer.MAX_VALUE).get();
    assertEquals(snapshotId.offset, request.snapshotId().endOffset());
    assertEquals(snapshotId.epoch, request.snapshotId().epoch());
    assertEquals(0, request.position());
    List<String> records = Arrays.asList("foo", "bar");
    MemorySnapshotWriter memorySnapshot = new MemorySnapshotWriter(snapshotId);
    try (SnapshotWriter<String> snapshotWriter = snapshotWriter(context, memorySnapshot)) {
        snapshotWriter.append(records);
        snapshotWriter.freeze();
    }
    ByteBuffer sendingBuffer = memorySnapshot.buffer().slice();
    sendingBuffer.limit(sendingBuffer.limit() / 2);
    context.deliverResponse(snapshotRequest.correlationId, snapshotRequest.destinationId(), fetchSnapshotResponse(context.metadataPartition, epoch, leaderId, snapshotId, memorySnapshot.buffer().remaining(), 0L, sendingBuffer));
    context.pollUntilRequest();
    snapshotRequest = context.assertSentFetchSnapshotRequest();
    request = assertFetchSnapshotRequest(snapshotRequest, context.metadataPartition, localId, Integer.MAX_VALUE).get();
    assertEquals(snapshotId.offset, request.snapshotId().endOffset());
    assertEquals(snapshotId.epoch, request.snapshotId().epoch());
    assertEquals(sendingBuffer.limit(), request.position());
    sendingBuffer = memorySnapshot.buffer().slice();
    sendingBuffer.position(Math.toIntExact(request.position()));
    context.deliverResponse(snapshotRequest.correlationId, snapshotRequest.destinationId(), fetchSnapshotResponse(context.metadataPartition, epoch, leaderId, snapshotId, memorySnapshot.buffer().remaining(), request.position(), sendingBuffer));
    context.pollUntilRequest();
    fetchRequest = context.assertSentFetchRequest();
    context.assertFetchRequestData(fetchRequest, epoch, snapshotId.offset, snapshotId.epoch);
    // Check that the snapshot was written to the log
    RawSnapshotReader snapshot = context.log.readSnapshot(snapshotId).get();
    assertEquals(memorySnapshot.buffer().remaining(), snapshot.sizeInBytes());
    SnapshotWriterReaderTest.assertSnapshot(Arrays.asList(records), snapshot);
    // Check that listener was notified of the new snapshot
    try (SnapshotReader<String> reader = context.listener.drainHandledSnapshot().get()) {
        assertEquals(snapshotId, reader.snapshotId());
        SnapshotWriterReaderTest.assertSnapshot(Arrays.asList(records), reader);
    }
}
Also used : RawSnapshotReader(org.apache.kafka.snapshot.RawSnapshotReader) ByteBuffer(java.nio.ByteBuffer) FetchSnapshotRequestData(org.apache.kafka.common.message.FetchSnapshotRequestData) SnapshotWriterReaderTest(org.apache.kafka.snapshot.SnapshotWriterReaderTest) Test(org.junit.jupiter.api.Test)

Example 5 with RawSnapshotReader

use of org.apache.kafka.snapshot.RawSnapshotReader in project kafka by apache.

the class KafkaRaftClientSnapshotTest method testPartialFetchSnapshotRequestAsLeader.

@Test
public void testPartialFetchSnapshotRequestAsLeader() throws Exception {
    int localId = 0;
    Set<Integer> voters = Utils.mkSet(localId, localId + 1);
    OffsetAndEpoch snapshotId = new OffsetAndEpoch(2, 1);
    List<String> records = Arrays.asList("foo", "bar");
    RaftClientTestContext context = new RaftClientTestContext.Builder(localId, voters).appendToLog(snapshotId.epoch, records).build();
    context.becomeLeader();
    int epoch = context.currentEpoch();
    context.advanceLocalLeaderHighWatermarkToLogEndOffset();
    try (SnapshotWriter<String> snapshot = context.client.createSnapshot(snapshotId.offset - 1, snapshotId.epoch, 0).get()) {
        assertEquals(snapshotId, snapshot.snapshotId());
        snapshot.append(records);
        snapshot.freeze();
    }
    RawSnapshotReader snapshot = context.log.readSnapshot(snapshotId).get();
    // Fetch half of the snapshot
    context.deliverRequest(fetchSnapshotRequest(context.metadataPartition, epoch, snapshotId, Math.toIntExact(snapshot.sizeInBytes() / 2), 0));
    context.client.poll();
    FetchSnapshotResponseData.PartitionSnapshot response = context.assertSentFetchSnapshotResponse(context.metadataPartition).get();
    assertEquals(Errors.NONE, Errors.forCode(response.errorCode()));
    assertEquals(snapshot.sizeInBytes(), response.size());
    assertEquals(0, response.position());
    assertEquals(snapshot.sizeInBytes() / 2, response.unalignedRecords().sizeInBytes());
    UnalignedMemoryRecords memoryRecords = (UnalignedMemoryRecords) snapshot.slice(0, Math.toIntExact(snapshot.sizeInBytes()));
    ByteBuffer snapshotBuffer = memoryRecords.buffer();
    ByteBuffer responseBuffer = ByteBuffer.allocate(Math.toIntExact(snapshot.sizeInBytes()));
    responseBuffer.put(((UnalignedMemoryRecords) response.unalignedRecords()).buffer());
    ByteBuffer expectedBytes = snapshotBuffer.duplicate();
    expectedBytes.limit(Math.toIntExact(snapshot.sizeInBytes() / 2));
    assertEquals(expectedBytes, responseBuffer.duplicate().flip());
    // Fetch the remainder of the snapshot
    context.deliverRequest(fetchSnapshotRequest(context.metadataPartition, epoch, snapshotId, Integer.MAX_VALUE, responseBuffer.position()));
    context.client.poll();
    response = context.assertSentFetchSnapshotResponse(context.metadataPartition).get();
    assertEquals(Errors.NONE, Errors.forCode(response.errorCode()));
    assertEquals(snapshot.sizeInBytes(), response.size());
    assertEquals(responseBuffer.position(), response.position());
    assertEquals(snapshot.sizeInBytes() - (snapshot.sizeInBytes() / 2), response.unalignedRecords().sizeInBytes());
    responseBuffer.put(((UnalignedMemoryRecords) response.unalignedRecords()).buffer());
    assertEquals(snapshotBuffer, responseBuffer.flip());
}
Also used : FetchSnapshotResponseData(org.apache.kafka.common.message.FetchSnapshotResponseData) RawSnapshotReader(org.apache.kafka.snapshot.RawSnapshotReader) ByteBuffer(java.nio.ByteBuffer) UnalignedMemoryRecords(org.apache.kafka.common.record.UnalignedMemoryRecords) SnapshotWriterReaderTest(org.apache.kafka.snapshot.SnapshotWriterReaderTest) Test(org.junit.jupiter.api.Test)

Aggregations

RawSnapshotReader (org.apache.kafka.snapshot.RawSnapshotReader)9 Test (org.junit.jupiter.api.Test)7 SnapshotWriterReaderTest (org.apache.kafka.snapshot.SnapshotWriterReaderTest)5 FetchSnapshotResponseData (org.apache.kafka.common.message.FetchSnapshotResponseData)4 FetchSnapshotRequestData (org.apache.kafka.common.message.FetchSnapshotRequestData)3 ByteBuffer (java.nio.ByteBuffer)2 UnalignedMemoryRecords (org.apache.kafka.common.record.UnalignedMemoryRecords)2 HashMap (java.util.HashMap)1 OptionalLong (java.util.OptionalLong)1 ExecutionException (java.util.concurrent.ExecutionException)1 TopicPartition (org.apache.kafka.common.TopicPartition)1 Uuid (org.apache.kafka.common.Uuid)1 AllocateProducerIdsRequestData (org.apache.kafka.common.message.AllocateProducerIdsRequestData)1 BrokerHeartbeatRequestData (org.apache.kafka.common.message.BrokerHeartbeatRequestData)1 BrokerRegistrationRequestData (org.apache.kafka.common.message.BrokerRegistrationRequestData)1 Listener (org.apache.kafka.common.message.BrokerRegistrationRequestData.Listener)1 ListenerCollection (org.apache.kafka.common.message.BrokerRegistrationRequestData.ListenerCollection)1 CreateTopicsRequestData (org.apache.kafka.common.message.CreateTopicsRequestData)1 CreatableReplicaAssignment (org.apache.kafka.common.message.CreateTopicsRequestData.CreatableReplicaAssignment)1 CreatableReplicaAssignmentCollection (org.apache.kafka.common.message.CreateTopicsRequestData.CreatableReplicaAssignmentCollection)1