Search in sources :

Example 1 with PartitionChangeRecord

use of org.apache.kafka.common.metadata.PartitionChangeRecord in project kafka by apache.

the class ReplicationControlManager method alterIsr.

ControllerResult<AlterIsrResponseData> alterIsr(AlterIsrRequestData request) {
    clusterControl.checkBrokerEpoch(request.brokerId(), request.brokerEpoch());
    AlterIsrResponseData response = new AlterIsrResponseData();
    List<ApiMessageAndVersion> records = new ArrayList<>();
    for (AlterIsrRequestData.TopicData topicData : request.topics()) {
        AlterIsrResponseData.TopicData responseTopicData = new AlterIsrResponseData.TopicData().setName(topicData.name());
        response.topics().add(responseTopicData);
        Uuid topicId = topicsByName.get(topicData.name());
        if (topicId == null || !topics.containsKey(topicId)) {
            for (AlterIsrRequestData.PartitionData partitionData : topicData.partitions()) {
                responseTopicData.partitions().add(new AlterIsrResponseData.PartitionData().setPartitionIndex(partitionData.partitionIndex()).setErrorCode(UNKNOWN_TOPIC_OR_PARTITION.code()));
            }
            log.info("Rejecting alterIsr request for unknown topic ID {}.", topicId);
            continue;
        }
        TopicControlInfo topic = topics.get(topicId);
        for (AlterIsrRequestData.PartitionData partitionData : topicData.partitions()) {
            int partitionId = partitionData.partitionIndex();
            PartitionRegistration partition = topic.parts.get(partitionId);
            if (partition == null) {
                responseTopicData.partitions().add(new AlterIsrResponseData.PartitionData().setPartitionIndex(partitionId).setErrorCode(UNKNOWN_TOPIC_OR_PARTITION.code()));
                log.info("Rejecting alterIsr request for unknown partition {}-{}.", topic.name, partitionId);
                continue;
            }
            if (partitionData.leaderEpoch() != partition.leaderEpoch) {
                responseTopicData.partitions().add(new AlterIsrResponseData.PartitionData().setPartitionIndex(partitionId).setErrorCode(FENCED_LEADER_EPOCH.code()));
                log.debug("Rejecting alterIsr request from node {} for {}-{} because " + "the current leader epoch is {}, not {}.", request.brokerId(), topic.name, partitionId, partition.leaderEpoch, partitionData.leaderEpoch());
                continue;
            }
            if (request.brokerId() != partition.leader) {
                responseTopicData.partitions().add(new AlterIsrResponseData.PartitionData().setPartitionIndex(partitionId).setErrorCode(INVALID_REQUEST.code()));
                log.info("Rejecting alterIsr request from node {} for {}-{} because " + "the current leader is {}.", request.brokerId(), topic.name, partitionId, partition.leader);
                continue;
            }
            if (partitionData.currentIsrVersion() != partition.partitionEpoch) {
                responseTopicData.partitions().add(new AlterIsrResponseData.PartitionData().setPartitionIndex(partitionId).setErrorCode(INVALID_UPDATE_VERSION.code()));
                log.info("Rejecting alterIsr request from node {} for {}-{} because " + "the current partition epoch is {}, not {}.", request.brokerId(), topic.name, partitionId, partition.partitionEpoch, partitionData.currentIsrVersion());
                continue;
            }
            int[] newIsr = Replicas.toArray(partitionData.newIsr());
            if (!Replicas.validateIsr(partition.replicas, newIsr)) {
                responseTopicData.partitions().add(new AlterIsrResponseData.PartitionData().setPartitionIndex(partitionId).setErrorCode(INVALID_REQUEST.code()));
                log.error("Rejecting alterIsr request from node {} for {}-{} because " + "it specified an invalid ISR {}.", request.brokerId(), topic.name, partitionId, partitionData.newIsr());
                continue;
            }
            if (!Replicas.contains(newIsr, partition.leader)) {
                // An alterIsr request can't ask for the current leader to be removed.
                responseTopicData.partitions().add(new AlterIsrResponseData.PartitionData().setPartitionIndex(partitionId).setErrorCode(INVALID_REQUEST.code()));
                log.error("Rejecting alterIsr request from node {} for {}-{} because " + "it specified an invalid ISR {} that doesn't include itself.", request.brokerId(), topic.name, partitionId, partitionData.newIsr());
                continue;
            }
            // At this point, we have decided to perform the ISR change. We use
            // PartitionChangeBuilder to find out what its effect will be.
            PartitionChangeBuilder builder = new PartitionChangeBuilder(partition, topic.id, partitionId, r -> clusterControl.unfenced(r), () -> configurationControl.uncleanLeaderElectionEnabledForTopic(topicData.name()));
            builder.setTargetIsr(partitionData.newIsr());
            Optional<ApiMessageAndVersion> record = builder.build();
            Errors result = Errors.NONE;
            if (record.isPresent()) {
                records.add(record.get());
                PartitionChangeRecord change = (PartitionChangeRecord) record.get().message();
                partition = partition.merge(change);
                if (log.isDebugEnabled()) {
                    log.debug("Node {} has altered ISR for {}-{} to {}.", request.brokerId(), topic.name, partitionId, change.isr());
                }
                if (change.leader() != request.brokerId() && change.leader() != NO_LEADER_CHANGE) {
                    // Normally, an alterIsr request, which is made by the partition
                    // leader itself, is not allowed to modify the partition leader.
                    // However, if there is an ongoing partition reassignment and the
                    // ISR change completes it, then the leader may change as part of
                    // the changes made during reassignment cleanup.
                    // 
                    // In this case, we report back FENCED_LEADER_EPOCH to the leader
                    // which made the alterIsr request. This lets it know that it must
                    // fetch new metadata before trying again. This return code is
                    // unusual because we both return an error and generate a new
                    // metadata record. We usually only do one or the other.
                    log.info("AlterIsr request from node {} for {}-{} completed " + "the ongoing partition reassignment and triggered a " + "leadership change. Reutrning FENCED_LEADER_EPOCH.", request.brokerId(), topic.name, partitionId);
                    responseTopicData.partitions().add(new AlterIsrResponseData.PartitionData().setPartitionIndex(partitionId).setErrorCode(FENCED_LEADER_EPOCH.code()));
                    continue;
                } else if (change.removingReplicas() != null || change.addingReplicas() != null) {
                    log.info("AlterIsr request from node {} for {}-{} completed " + "the ongoing partition reassignment.", request.brokerId(), topic.name, partitionId);
                }
            }
            responseTopicData.partitions().add(new AlterIsrResponseData.PartitionData().setPartitionIndex(partitionId).setErrorCode(result.code()).setLeaderId(partition.leader).setLeaderEpoch(partition.leaderEpoch).setCurrentIsrVersion(partition.partitionEpoch).setIsr(Replicas.toList(partition.isr)));
        }
    }
    return ControllerResult.of(records, response);
}
Also used : PartitionRegistration(org.apache.kafka.metadata.PartitionRegistration) PartitionChangeRecord(org.apache.kafka.common.metadata.PartitionChangeRecord) ArrayList(java.util.ArrayList) AlterIsrRequestData(org.apache.kafka.common.message.AlterIsrRequestData) Errors(org.apache.kafka.common.protocol.Errors) Uuid(org.apache.kafka.common.Uuid) ApiMessageAndVersion(org.apache.kafka.server.common.ApiMessageAndVersion) AlterIsrResponseData(org.apache.kafka.common.message.AlterIsrResponseData)

Example 2 with PartitionChangeRecord

use of org.apache.kafka.common.metadata.PartitionChangeRecord in project kafka by apache.

the class PartitionChangeBuilder method build.

public Optional<ApiMessageAndVersion> build() {
    PartitionChangeRecord record = new PartitionChangeRecord().setTopicId(topicId).setPartitionId(partitionId);
    completeReassignmentIfNeeded();
    if (shouldTryElection()) {
        tryElection(record);
    }
    triggerLeaderEpochBumpIfNeeded(record);
    if (!targetIsr.isEmpty() && !targetIsr.equals(Replicas.toList(partition.isr))) {
        record.setIsr(targetIsr);
    }
    if (!targetReplicas.isEmpty() && !targetReplicas.equals(Replicas.toList(partition.replicas))) {
        record.setReplicas(targetReplicas);
    }
    if (!targetRemoving.equals(Replicas.toList(partition.removingReplicas))) {
        record.setRemovingReplicas(targetRemoving);
    }
    if (!targetAdding.equals(Replicas.toList(partition.addingReplicas))) {
        record.setAddingReplicas(targetAdding);
    }
    if (changeRecordIsNoOp(record)) {
        return Optional.empty();
    } else {
        return Optional.of(new ApiMessageAndVersion(record, PARTITION_CHANGE_RECORD.highestSupportedVersion()));
    }
}
Also used : PartitionChangeRecord(org.apache.kafka.common.metadata.PartitionChangeRecord) ApiMessageAndVersion(org.apache.kafka.server.common.ApiMessageAndVersion)

Example 3 with PartitionChangeRecord

use of org.apache.kafka.common.metadata.PartitionChangeRecord in project kafka by apache.

the class PartitionChangeBuilderTest method testChangeRecordIsNoOp.

@Test
public void testChangeRecordIsNoOp() {
    assertTrue(changeRecordIsNoOp(new PartitionChangeRecord()));
    assertFalse(changeRecordIsNoOp(new PartitionChangeRecord().setLeader(1)));
    assertFalse(changeRecordIsNoOp(new PartitionChangeRecord().setIsr(Arrays.asList(1, 2, 3))));
    assertFalse(changeRecordIsNoOp(new PartitionChangeRecord().setRemovingReplicas(Arrays.asList(1))));
    assertFalse(changeRecordIsNoOp(new PartitionChangeRecord().setAddingReplicas(Arrays.asList(4))));
}
Also used : PartitionChangeRecord(org.apache.kafka.common.metadata.PartitionChangeRecord) Test(org.junit.jupiter.api.Test)

Example 4 with PartitionChangeRecord

use of org.apache.kafka.common.metadata.PartitionChangeRecord in project kafka by apache.

the class PartitionChangeBuilderTest method testAddingReplicaReassignment.

@Test
public void testAddingReplicaReassignment() {
    PartitionReassignmentReplicas replicas = new PartitionReassignmentReplicas(Replicas.toList(FOO.replicas), Arrays.asList(1, 2, 3, 4));
    assertEquals(Collections.emptyList(), replicas.removing());
    assertEquals(Collections.singletonList(4), replicas.adding());
    assertEquals(Arrays.asList(1, 2, 3, 4), replicas.merged());
    assertEquals(Optional.of(new ApiMessageAndVersion(new PartitionChangeRecord().setTopicId(FOO_ID).setPartitionId(0).setReplicas(Arrays.asList(1, 2, 3, 4)).setAddingReplicas(Collections.singletonList(4)), PARTITION_CHANGE_RECORD.highestSupportedVersion())), createFooBuilder(false).setTargetReplicas(replicas.merged()).setTargetAdding(replicas.adding()).build());
}
Also used : PartitionChangeRecord(org.apache.kafka.common.metadata.PartitionChangeRecord) ApiMessageAndVersion(org.apache.kafka.server.common.ApiMessageAndVersion) Test(org.junit.jupiter.api.Test)

Example 5 with PartitionChangeRecord

use of org.apache.kafka.common.metadata.PartitionChangeRecord in project kafka by apache.

the class PartitionChangeBuilderTest method testRevertReassignment.

@Test
public void testRevertReassignment() {
    PartitionReassignmentRevert revert = new PartitionReassignmentRevert(BAR);
    assertEquals(Arrays.asList(1, 2, 3), revert.replicas());
    assertEquals(Arrays.asList(1, 2, 3), revert.isr());
    assertEquals(Optional.of(new ApiMessageAndVersion(new PartitionChangeRecord().setTopicId(BAR_ID).setPartitionId(0).setReplicas(Arrays.asList(1, 2, 3)).setLeader(1).setRemovingReplicas(Collections.emptyList()).setAddingReplicas(Collections.emptyList()), PARTITION_CHANGE_RECORD.highestSupportedVersion())), createBarBuilder(false).setTargetReplicas(revert.replicas()).setTargetIsr(revert.isr()).setTargetRemoving(Collections.emptyList()).setTargetAdding(Collections.emptyList()).build());
}
Also used : PartitionChangeRecord(org.apache.kafka.common.metadata.PartitionChangeRecord) ApiMessageAndVersion(org.apache.kafka.server.common.ApiMessageAndVersion) Test(org.junit.jupiter.api.Test)

Aggregations

PartitionChangeRecord (org.apache.kafka.common.metadata.PartitionChangeRecord)15 ApiMessageAndVersion (org.apache.kafka.server.common.ApiMessageAndVersion)11 Test (org.junit.jupiter.api.Test)11 Uuid (org.apache.kafka.common.Uuid)8 ArrayList (java.util.ArrayList)5 AlterIsrRequestData (org.apache.kafka.common.message.AlterIsrRequestData)5 AlterIsrResponseData (org.apache.kafka.common.message.AlterIsrResponseData)5 TopicPartition (org.apache.kafka.common.TopicPartition)4 ParameterizedTest (org.junit.jupiter.params.ParameterizedTest)4 PartitionData (org.apache.kafka.common.message.AlterIsrRequestData.PartitionData)3 TopicData (org.apache.kafka.common.message.AlterIsrRequestData.TopicData)3 AlterPartitionReassignmentsRequestData (org.apache.kafka.common.message.AlterPartitionReassignmentsRequestData)3 ReassignablePartition (org.apache.kafka.common.message.AlterPartitionReassignmentsRequestData.ReassignablePartition)3 ReassignableTopic (org.apache.kafka.common.message.AlterPartitionReassignmentsRequestData.ReassignableTopic)3 AlterPartitionReassignmentsResponseData (org.apache.kafka.common.message.AlterPartitionReassignmentsResponseData)3 ReassignablePartitionResponse (org.apache.kafka.common.message.AlterPartitionReassignmentsResponseData.ReassignablePartitionResponse)3 ReassignableTopicResponse (org.apache.kafka.common.message.AlterPartitionReassignmentsResponseData.ReassignableTopicResponse)3 ElectLeadersRequestData (org.apache.kafka.common.message.ElectLeadersRequestData)3 ElectLeadersResponseData (org.apache.kafka.common.message.ElectLeadersResponseData)3 ListPartitionReassignmentsTopics (org.apache.kafka.common.message.ListPartitionReassignmentsRequestData.ListPartitionReassignmentsTopics)3