Search in sources :

Example 1 with KafkaStreamMetadata

use of com.linkedin.pinot.common.metadata.stream.KafkaStreamMetadata in project pinot by linkedin.

the class PinotHelixResourceManager method ensureRealtimeClusterIsSetUp.

private void ensureRealtimeClusterIsSetUp(AbstractTableConfig config, String realtimeTableName, IndexingConfig indexingConfig) {
    KafkaStreamMetadata kafkaStreamMetadata = new KafkaStreamMetadata(indexingConfig.getStreamConfigs());
    IdealState idealState = _helixAdmin.getResourceIdealState(_helixClusterName, realtimeTableName);
    if (kafkaStreamMetadata.hasHighLevelKafkaConsumerType()) {
        if (kafkaStreamMetadata.hasSimpleKafkaConsumerType()) {
            // We may be adding on low-level, or creating both.
            if (idealState == null) {
                // Need to create both. Create high-level consumer first.
                createHelixEntriesForHighLevelConsumer(config, realtimeTableName, idealState);
                idealState = _helixAdmin.getResourceIdealState(_helixClusterName, realtimeTableName);
                LOGGER.info("Configured new HLC for table {}", realtimeTableName);
            }
        // Fall through to create low-level consumers
        } else {
            // Only high-level consumer specified in the config.
            createHelixEntriesForHighLevelConsumer(config, realtimeTableName, idealState);
            // Clean up any LLC table if they are present
            PinotLLCRealtimeSegmentManager.getInstance().cleanupLLC(realtimeTableName);
        }
    }
    // Either we have only low-level consumer, or both.
    if (kafkaStreamMetadata.hasSimpleKafkaConsumerType()) {
        // Will either create idealstate entry, or update the IS entry with new segments
        // (unless there are low-level segments already present)
        final String llcKafkaPartitionAssignmentPath = ZKMetadataProvider.constructPropertyStorePathForKafkaPartitions(realtimeTableName);
        if (!_propertyStore.exists(llcKafkaPartitionAssignmentPath, AccessOption.PERSISTENT)) {
            PinotTableIdealStateBuilder.buildLowLevelRealtimeIdealStateFor(realtimeTableName, config, _helixAdmin, _helixClusterName, idealState);
            LOGGER.info("Successfully added Helix entries for low-level consumers for {} ", realtimeTableName);
        } else {
            LOGGER.info("LLC is already set up for table {}, not configuring again", realtimeTableName);
        }
    }
}
Also used : KafkaStreamMetadata(com.linkedin.pinot.common.metadata.stream.KafkaStreamMetadata) IdealState(org.apache.helix.model.IdealState)

Example 2 with KafkaStreamMetadata

use of com.linkedin.pinot.common.metadata.stream.KafkaStreamMetadata in project pinot by linkedin.

the class ValidationManager method runValidation.

/**
   * Runs a validation pass over the currently loaded tables.
   */
public void runValidation() {
    if (!_pinotHelixResourceManager.isLeader()) {
        LOGGER.info("Skipping validation, not leader!");
        return;
    }
    LOGGER.info("Starting validation");
    // Fetch the list of tables
    List<String> allTableNames = _pinotHelixResourceManager.getAllPinotTableNames();
    ZkHelixPropertyStore<ZNRecord> propertyStore = _pinotHelixResourceManager.getPropertyStore();
    for (String tableName : allTableNames) {
        List<SegmentMetadata> segmentMetadataList = new ArrayList<SegmentMetadata>();
        TableType tableType = TableNameBuilder.getTableTypeFromTableName(tableName);
        AbstractTableConfig tableConfig = null;
        _pinotHelixResourceManager.rebuildBrokerResourceFromHelixTags(tableName);
        // For each table, fetch the metadata for all its segments
        if (tableType.equals(TableType.OFFLINE)) {
            validateOfflineSegmentPush(propertyStore, tableName, segmentMetadataList);
        } else if (tableType.equals(TableType.REALTIME)) {
            LOGGER.info("Starting to validate table {}", tableName);
            List<RealtimeSegmentZKMetadata> realtimeSegmentZKMetadatas = ZKMetadataProvider.getRealtimeSegmentZKMetadataListForTable(propertyStore, tableName);
            // false if this table has ONLY LLC segments (i.e. fully migrated)
            boolean countHLCSegments = true;
            KafkaStreamMetadata streamMetadata = null;
            try {
                tableConfig = _pinotHelixResourceManager.getRealtimeTableConfig(tableName);
                streamMetadata = new KafkaStreamMetadata(tableConfig.getIndexingConfig().getStreamConfigs());
                if (streamMetadata.hasSimpleKafkaConsumerType() && !streamMetadata.hasHighLevelKafkaConsumerType()) {
                    countHLCSegments = false;
                }
                for (RealtimeSegmentZKMetadata realtimeSegmentZKMetadata : realtimeSegmentZKMetadatas) {
                    SegmentMetadata segmentMetadata = new SegmentMetadataImpl(realtimeSegmentZKMetadata);
                    segmentMetadataList.add(segmentMetadata);
                }
                // Update the gauge to contain the total document count in the segments
                _validationMetrics.updateTotalDocumentsGauge(tableName, computeRealtimeTotalDocumentInSegments(segmentMetadataList, countHLCSegments));
                if (streamMetadata.hasSimpleKafkaConsumerType()) {
                    validateLLCSegments(tableName, tableConfig);
                }
            } catch (Exception e) {
                if (tableConfig == null) {
                    LOGGER.warn("Cannot get realtime tableconfig for {}", tableName);
                } else if (streamMetadata == null) {
                    LOGGER.warn("Cannot get streamconfig for {}", tableName);
                } else {
                    LOGGER.error("Exception while validating table {}", tableName, e);
                }
            }
        } else {
            LOGGER.warn("Ignoring table type {} for table {}", tableType, tableName);
        }
    }
    LOGGER.info("Validation completed");
}
Also used : KafkaStreamMetadata(com.linkedin.pinot.common.metadata.stream.KafkaStreamMetadata) TableType(com.linkedin.pinot.common.utils.CommonConstants.Helix.TableType) ArrayList(java.util.ArrayList) SegmentMetadata(com.linkedin.pinot.common.segment.SegmentMetadata) RealtimeSegmentZKMetadata(com.linkedin.pinot.common.metadata.segment.RealtimeSegmentZKMetadata) ArrayList(java.util.ArrayList) List(java.util.List) SegmentMetadataImpl(com.linkedin.pinot.core.segment.index.SegmentMetadataImpl) AbstractTableConfig(com.linkedin.pinot.common.config.AbstractTableConfig) ZNRecord(org.apache.helix.ZNRecord)

Example 3 with KafkaStreamMetadata

use of com.linkedin.pinot.common.metadata.stream.KafkaStreamMetadata in project pinot by linkedin.

the class PinotRealtimeSegmentManager method assignRealtimeSegmentsToServerInstancesIfNecessary.

private synchronized void assignRealtimeSegmentsToServerInstancesIfNecessary() throws JSONException, IOException {
    // Fetch current ideal state snapshot
    Map<String, IdealState> idealStateMap = new HashMap<String, IdealState>();
    for (String resource : _pinotHelixResourceManager.getAllRealtimeTables()) {
        final String tableName = TableNameBuilder.extractRawTableName(resource);
        AbstractTableConfig tableConfig = _pinotHelixResourceManager.getTableConfig(tableName, TableType.REALTIME);
        KafkaStreamMetadata metadata = new KafkaStreamMetadata(tableConfig.getIndexingConfig().getStreamConfigs());
        if (metadata.hasHighLevelKafkaConsumerType()) {
            idealStateMap.put(resource, _pinotHelixResourceManager.getHelixAdmin().getResourceIdealState(_pinotHelixResourceManager.getHelixClusterName(), resource));
        } else {
            LOGGER.debug("Not considering table {} for realtime segment assignment");
        }
    }
    List<Pair<String, String>> listOfSegmentsToAddToInstances = new ArrayList<Pair<String, String>>();
    for (String resource : idealStateMap.keySet()) {
        try {
            IdealState state = idealStateMap.get(resource);
            // Are there any partitions?
            if (state.getPartitionSet().size() == 0) {
                // No, this is a brand new ideal state, so we will add one new segment to every partition and replica
                List<String> instancesInResource = new ArrayList<String>();
                try {
                    instancesInResource.addAll(_pinotHelixResourceManager.getServerInstancesForTable(resource, TableType.REALTIME));
                } catch (Exception e) {
                    LOGGER.error("Caught exception while fetching instances for resource {}", resource, e);
                    _controllerMetrics.addMeteredGlobalValue(ControllerMeter.CONTROLLER_REALTIME_TABLE_SEGMENT_ASSIGNMENT_ERROR, 1L);
                }
                // Assign a new segment to all server instances
                for (String instanceId : instancesInResource) {
                    InstanceZKMetadata instanceZKMetadata = _pinotHelixResourceManager.getInstanceZKMetadata(instanceId);
                    if (instanceZKMetadata == null) {
                        LOGGER.warn("Instance {} has no associated instance metadata in ZK, ignoring for segment assignment.", instanceId);
                        _controllerMetrics.addMeteredGlobalValue(ControllerMeter.CONTROLLER_REALTIME_TABLE_SEGMENT_ASSIGNMENT_ERROR, 1L);
                        continue;
                    }
                    String groupId = instanceZKMetadata.getGroupId(resource);
                    String partitionId = instanceZKMetadata.getPartition(resource);
                    if (groupId != null && !groupId.isEmpty() && partitionId != null && !partitionId.isEmpty()) {
                        listOfSegmentsToAddToInstances.add(new Pair<String, String>(new HLCSegmentName(groupId, partitionId, String.valueOf(System.currentTimeMillis())).getSegmentName(), instanceId));
                    } else {
                        LOGGER.warn("Instance {} has invalid groupId ({}) and/or partitionId ({}) for resource {}, ignoring for segment assignment.", instanceId, groupId, partitionId, resource);
                        _controllerMetrics.addMeteredGlobalValue(ControllerMeter.CONTROLLER_REALTIME_TABLE_SEGMENT_ASSIGNMENT_ERROR, 1L);
                    }
                }
            } else {
                // Add all server instances to the list of instances for which to assign a realtime segment
                Set<String> instancesToAssignRealtimeSegment = new HashSet<String>();
                try {
                    instancesToAssignRealtimeSegment.addAll(_pinotHelixResourceManager.getServerInstancesForTable(resource, TableType.REALTIME));
                } catch (Exception e) {
                    LOGGER.error("Caught exception while fetching instances for resource {}", resource, e);
                    _controllerMetrics.addMeteredGlobalValue(ControllerMeter.CONTROLLER_REALTIME_TABLE_SEGMENT_ASSIGNMENT_ERROR, 1L);
                }
                // Remove server instances that are currently processing a segment
                for (String partition : state.getPartitionSet()) {
                    // Helix partition is the segment name
                    if (SegmentName.isHighLevelConsumerSegmentName(partition)) {
                        HLCSegmentName segName = new HLCSegmentName(partition);
                        RealtimeSegmentZKMetadata realtimeSegmentZKMetadata = ZKMetadataProvider.getRealtimeSegmentZKMetadata(_pinotHelixResourceManager.getPropertyStore(), segName.getTableName(), partition);
                        if (realtimeSegmentZKMetadata == null) {
                            // Segment was deleted by retention manager.
                            continue;
                        }
                        if (realtimeSegmentZKMetadata.getStatus() == Status.IN_PROGRESS) {
                            instancesToAssignRealtimeSegment.removeAll(state.getInstanceSet(partition));
                        }
                    }
                }
                // Assign a new segment to the server instances not currently processing this segment
                for (String instanceId : instancesToAssignRealtimeSegment) {
                    InstanceZKMetadata instanceZKMetadata = _pinotHelixResourceManager.getInstanceZKMetadata(instanceId);
                    String groupId = instanceZKMetadata.getGroupId(resource);
                    String partitionId = instanceZKMetadata.getPartition(resource);
                    listOfSegmentsToAddToInstances.add(new Pair<String, String>(new HLCSegmentName(groupId, partitionId, String.valueOf(System.currentTimeMillis())).getSegmentName(), instanceId));
                }
            }
        } catch (Exception e) {
            LOGGER.warn("Caught exception while processing resource {}, skipping.", resource, e);
            _controllerMetrics.addMeteredGlobalValue(ControllerMeter.CONTROLLER_REALTIME_TABLE_SEGMENT_ASSIGNMENT_ERROR, 1L);
        }
    }
    LOGGER.info("Computed list of new segments to add : " + Arrays.toString(listOfSegmentsToAddToInstances.toArray()));
    // Add the new segments to the server instances
    for (final Pair<String, String> segmentIdAndInstanceId : listOfSegmentsToAddToInstances) {
        final String segmentId = segmentIdAndInstanceId.getFirst();
        final String instanceName = segmentIdAndInstanceId.getSecond();
        try {
            final HLCSegmentName segName = new HLCSegmentName(segmentId);
            String resourceName = segName.getTableName();
            // Does the ideal state already contain this segment?
            if (!idealStateMap.get(resourceName).getPartitionSet().contains(segmentId)) {
                // No, add it
                // Create the realtime segment metadata
                RealtimeSegmentZKMetadata realtimeSegmentMetadataToAdd = new RealtimeSegmentZKMetadata();
                realtimeSegmentMetadataToAdd.setTableName(TableNameBuilder.extractRawTableName(resourceName));
                realtimeSegmentMetadataToAdd.setSegmentType(SegmentType.REALTIME);
                realtimeSegmentMetadataToAdd.setStatus(Status.IN_PROGRESS);
                realtimeSegmentMetadataToAdd.setSegmentName(segmentId);
                // Add the new metadata to the property store
                ZKMetadataProvider.setRealtimeSegmentZKMetadata(_pinotHelixResourceManager.getPropertyStore(), realtimeSegmentMetadataToAdd);
                // Update the ideal state to add the new realtime segment
                HelixHelper.updateIdealState(_pinotHelixResourceManager.getHelixZkManager(), resourceName, new Function<IdealState, IdealState>() {

                    @Override
                    public IdealState apply(IdealState idealState) {
                        return PinotTableIdealStateBuilder.addNewRealtimeSegmentToIdealState(segmentId, idealState, instanceName);
                    }
                }, RetryPolicies.exponentialBackoffRetryPolicy(5, 500L, 2.0f));
            }
        } catch (Exception e) {
            LOGGER.warn("Caught exception while processing segment {} for instance {}, skipping.", segmentId, instanceName, e);
            _controllerMetrics.addMeteredGlobalValue(ControllerMeter.CONTROLLER_REALTIME_TABLE_SEGMENT_ASSIGNMENT_ERROR, 1L);
        }
    }
}
Also used : HLCSegmentName(com.linkedin.pinot.common.utils.HLCSegmentName) KafkaStreamMetadata(com.linkedin.pinot.common.metadata.stream.KafkaStreamMetadata) HashMap(java.util.HashMap) InstanceZKMetadata(com.linkedin.pinot.common.metadata.instance.InstanceZKMetadata) ArrayList(java.util.ArrayList) IdealState(org.apache.helix.model.IdealState) JSONException(org.json.JSONException) IOException(java.io.IOException) RealtimeSegmentZKMetadata(com.linkedin.pinot.common.metadata.segment.RealtimeSegmentZKMetadata) AbstractTableConfig(com.linkedin.pinot.common.config.AbstractTableConfig) Pair(com.linkedin.pinot.core.query.utils.Pair) HashSet(java.util.HashSet)

Example 4 with KafkaStreamMetadata

use of com.linkedin.pinot.common.metadata.stream.KafkaStreamMetadata in project pinot by linkedin.

the class PinotRealtimeSegmentManager method refreshWatchers.

/**
   * Helper method to perform idempotent operation to refresh all watches (related to real-time segments):
   * - Data change listener for all existing real-time tables.
   * - Child creation listener for all existing real-time tables.
   * - Data change listener for all existing real-time segments
   *
   * @param path
   */
private void refreshWatchers(String path) {
    LOGGER.info("Received change notification for path: {}", path);
    List<Stat> stats = new ArrayList<>();
    List<ZNRecord> tableConfigs = _pinotHelixResourceManager.getPropertyStore().getChildren(TABLE_CONFIG, stats, 0);
    if (tableConfigs == null) {
        return;
    }
    for (ZNRecord tableConfigZnRecord : tableConfigs) {
        try {
            String znRecordId = tableConfigZnRecord.getId();
            if (TableNameBuilder.getTableTypeFromTableName(znRecordId) == TableType.REALTIME) {
                AbstractTableConfig abstractTableConfig = AbstractTableConfig.fromZnRecord(tableConfigZnRecord);
                KafkaStreamMetadata metadata = new KafkaStreamMetadata(abstractTableConfig.getIndexingConfig().getStreamConfigs());
                if (metadata.hasHighLevelKafkaConsumerType()) {
                    String realtimeTable = abstractTableConfig.getTableName();
                    String realtimeSegmentsPathForTable = _propertyStorePath + SEGMENTS_PATH + "/" + realtimeTable;
                    LOGGER.info("Setting data/child changes watch for real-time table '{}'", realtimeTable);
                    _zkClient.subscribeDataChanges(realtimeSegmentsPathForTable, this);
                    _zkClient.subscribeChildChanges(realtimeSegmentsPathForTable, this);
                    List<String> childNames = _pinotHelixResourceManager.getPropertyStore().getChildNames(SEGMENTS_PATH + "/" + realtimeTable, 0);
                    if (childNames != null && !childNames.isEmpty()) {
                        for (String segmentName : childNames) {
                            if (!SegmentName.isHighLevelConsumerSegmentName(segmentName)) {
                                continue;
                            }
                            String segmentPath = realtimeSegmentsPathForTable + "/" + segmentName;
                            RealtimeSegmentZKMetadata realtimeSegmentZKMetadata = ZKMetadataProvider.getRealtimeSegmentZKMetadata(_pinotHelixResourceManager.getPropertyStore(), abstractTableConfig.getTableName(), segmentName);
                            if (realtimeSegmentZKMetadata == null) {
                                // The segment got deleted by retention manager
                                continue;
                            }
                            if (realtimeSegmentZKMetadata.getStatus() == Status.IN_PROGRESS) {
                                LOGGER.info("Setting data change watch for real-time segment currently being consumed: {}", segmentPath);
                                _zkClient.subscribeDataChanges(segmentPath, this);
                            } else {
                                _zkClient.unsubscribeDataChanges(segmentPath, this);
                            }
                        }
                    }
                }
            }
        } catch (Exception e) {
            // we want to continue setting watches for other tables for any kind of exception here so that
            // errors with one table don't impact others
            LOGGER.error("Caught exception while processing ZNRecord id: {}. Skipping node to continue setting watches", tableConfigZnRecord.getId(), e);
        }
    }
}
Also used : RealtimeSegmentZKMetadata(com.linkedin.pinot.common.metadata.segment.RealtimeSegmentZKMetadata) KafkaStreamMetadata(com.linkedin.pinot.common.metadata.stream.KafkaStreamMetadata) Stat(org.apache.zookeeper.data.Stat) ArrayList(java.util.ArrayList) AbstractTableConfig(com.linkedin.pinot.common.config.AbstractTableConfig) ZNRecord(org.apache.helix.ZNRecord) JSONException(org.json.JSONException) IOException(java.io.IOException)

Example 5 with KafkaStreamMetadata

use of com.linkedin.pinot.common.metadata.stream.KafkaStreamMetadata in project pinot by linkedin.

the class PinotLLCRealtimeSegmentManager method updateKafkaPartitionsIfNecessary.

/**
   * Update the kafka partitions as necessary to accommodate changes in number of replicas, number of tenants or
   * number of kafka partitions. As new segments are assigned, they will obey the new kafka partition assignment.
   *
   * @param realtimeTableName name of the realtime table
   * @param tableConfig tableConfig from propertystore
   */
public void updateKafkaPartitionsIfNecessary(String realtimeTableName, AbstractTableConfig tableConfig) {
    final ZNRecord partitionAssignment = getKafkaPartitionAssignment(realtimeTableName);
    final Map<String, List<String>> partitionToServersMap = partitionAssignment.getListFields();
    final KafkaStreamMetadata kafkaStreamMetadata = new KafkaStreamMetadata(tableConfig.getIndexingConfig().getStreamConfigs());
    final String realtimeServerTenantName = ControllerTenantNameBuilder.getRealtimeTenantNameForTenant(tableConfig.getTenantConfig().getServer());
    final List<String> currentInstances = getInstances(realtimeServerTenantName);
    // Previous partition count is what we find in the Kafka partition assignment znode.
    // Get the current partition count from Kafka.
    final int prevPartitionCount = partitionToServersMap.size();
    int currentPartitionCount = -1;
    try {
        currentPartitionCount = getKafkaPartitionCount(kafkaStreamMetadata);
    } catch (Exception e) {
        LOGGER.warn("Could not get partition count for {}. Leaving kafka partition count at {}", realtimeTableName, currentPartitionCount);
        return;
    }
    // Previous instance set is what we find in the Kafka partition assignment znode (values of the map entries)
    final Set<String> prevInstances = new HashSet<>(currentInstances.size());
    for (List<String> servers : partitionToServersMap.values()) {
        prevInstances.addAll(servers);
    }
    final int prevReplicaCount = partitionToServersMap.entrySet().iterator().next().getValue().size();
    final int currentReplicaCount = Integer.valueOf(tableConfig.getValidationConfig().getReplicasPerPartition());
    boolean updateKafkaAssignment = false;
    if (!prevInstances.equals(new HashSet<String>(currentInstances))) {
        LOGGER.info("Detected change in instances for table {}", realtimeTableName);
        updateKafkaAssignment = true;
    }
    if (prevPartitionCount != currentPartitionCount) {
        LOGGER.info("Detected change in Kafka partition count for table {} from {} to {}", realtimeTableName, prevPartitionCount, currentPartitionCount);
        updateKafkaAssignment = true;
    }
    if (prevReplicaCount != currentReplicaCount) {
        LOGGER.info("Detected change in per-partition replica count for table {} from {} to {}", realtimeTableName, prevReplicaCount, currentReplicaCount);
        updateKafkaAssignment = true;
    }
    if (!updateKafkaAssignment) {
        LOGGER.info("Not updating Kafka partition assignment for table {}", realtimeTableName);
        return;
    }
    // Generate new kafka partition assignment and update the znode
    if (currentInstances.size() < currentReplicaCount) {
        LOGGER.error("Cannot have {} replicas in {} instances for {}.Not updating partition assignment", currentReplicaCount, currentInstances.size(), realtimeTableName);
        return;
    }
    ZNRecord newPartitionAssignment = generatePartitionAssignment(kafkaStreamMetadata.getKafkaTopicName(), currentPartitionCount, currentInstances, currentReplicaCount);
    writeKafkaPartitionAssignemnt(realtimeTableName, newPartitionAssignment);
    LOGGER.info("Successfully updated Kafka partition assignment for table {}", realtimeTableName);
}
Also used : KafkaStreamMetadata(com.linkedin.pinot.common.metadata.stream.KafkaStreamMetadata) List(java.util.List) ArrayList(java.util.ArrayList) ZNRecord(org.apache.helix.ZNRecord) TimeoutException(java.util.concurrent.TimeoutException) HashSet(java.util.HashSet)

Aggregations

KafkaStreamMetadata (com.linkedin.pinot.common.metadata.stream.KafkaStreamMetadata)9 ArrayList (java.util.ArrayList)5 ZNRecord (org.apache.helix.ZNRecord)5 AbstractTableConfig (com.linkedin.pinot.common.config.AbstractTableConfig)4 RealtimeSegmentZKMetadata (com.linkedin.pinot.common.metadata.segment.RealtimeSegmentZKMetadata)3 HashMap (java.util.HashMap)3 HashSet (java.util.HashSet)3 IdealState (org.apache.helix.model.IdealState)3 LLCSegmentName (com.linkedin.pinot.common.utils.LLCSegmentName)2 IOException (java.io.IOException)2 List (java.util.List)2 TimeoutException (java.util.concurrent.TimeoutException)2 JSONException (org.json.JSONException)2 IndexingConfig (com.linkedin.pinot.common.config.IndexingConfig)1 InstanceZKMetadata (com.linkedin.pinot.common.metadata.instance.InstanceZKMetadata)1 LLCRealtimeSegmentZKMetadata (com.linkedin.pinot.common.metadata.segment.LLCRealtimeSegmentZKMetadata)1 SegmentMetadata (com.linkedin.pinot.common.segment.SegmentMetadata)1 TableType (com.linkedin.pinot.common.utils.CommonConstants.Helix.TableType)1 HLCSegmentName (com.linkedin.pinot.common.utils.HLCSegmentName)1 PinotLLCRealtimeSegmentManager (com.linkedin.pinot.controller.helix.core.realtime.PinotLLCRealtimeSegmentManager)1