Search in sources :

Example 16 with ZNRecord

use of org.apache.helix.ZNRecord in project pinot by linkedin.

the class HelixSetupUtils method initPropertyStorePath.

private static void initPropertyStorePath(String helixClusterName, String zkPath) {
    String propertyStorePath = PropertyPathConfig.getPath(PropertyType.PROPERTYSTORE, helixClusterName);
    ZkHelixPropertyStore<ZNRecord> propertyStore = new ZkHelixPropertyStore<ZNRecord>(zkPath, new ZNRecordSerializer(), propertyStorePath);
    propertyStore.create("/CONFIGS", new ZNRecord(""), AccessOption.PERSISTENT);
    propertyStore.create("/CONFIGS/CLUSTER", new ZNRecord(""), AccessOption.PERSISTENT);
    propertyStore.create("/CONFIGS/TABLE", new ZNRecord(""), AccessOption.PERSISTENT);
    propertyStore.create("/CONFIGS/INSTANCE", new ZNRecord(""), AccessOption.PERSISTENT);
    propertyStore.create("/SCHEMAS", new ZNRecord(""), AccessOption.PERSISTENT);
    propertyStore.create("/SEGMENTS", new ZNRecord(""), AccessOption.PERSISTENT);
}
Also used : ZkHelixPropertyStore(org.apache.helix.store.zk.ZkHelixPropertyStore) ZNRecord(org.apache.helix.ZNRecord) ZNRecordSerializer(org.apache.helix.manager.zk.ZNRecordSerializer)

Example 17 with ZNRecord

use of org.apache.helix.ZNRecord in project pinot by linkedin.

the class ValidationManager method runValidation.

/**
   * Runs a validation pass over the currently loaded tables.
   */
public void runValidation() {
    if (!_pinotHelixResourceManager.isLeader()) {
        LOGGER.info("Skipping validation, not leader!");
        return;
    }
    LOGGER.info("Starting validation");
    // Fetch the list of tables
    List<String> allTableNames = _pinotHelixResourceManager.getAllPinotTableNames();
    ZkHelixPropertyStore<ZNRecord> propertyStore = _pinotHelixResourceManager.getPropertyStore();
    for (String tableName : allTableNames) {
        List<SegmentMetadata> segmentMetadataList = new ArrayList<SegmentMetadata>();
        TableType tableType = TableNameBuilder.getTableTypeFromTableName(tableName);
        AbstractTableConfig tableConfig = null;
        _pinotHelixResourceManager.rebuildBrokerResourceFromHelixTags(tableName);
        // For each table, fetch the metadata for all its segments
        if (tableType.equals(TableType.OFFLINE)) {
            validateOfflineSegmentPush(propertyStore, tableName, segmentMetadataList);
        } else if (tableType.equals(TableType.REALTIME)) {
            LOGGER.info("Starting to validate table {}", tableName);
            List<RealtimeSegmentZKMetadata> realtimeSegmentZKMetadatas = ZKMetadataProvider.getRealtimeSegmentZKMetadataListForTable(propertyStore, tableName);
            // false if this table has ONLY LLC segments (i.e. fully migrated)
            boolean countHLCSegments = true;
            KafkaStreamMetadata streamMetadata = null;
            try {
                tableConfig = _pinotHelixResourceManager.getRealtimeTableConfig(tableName);
                streamMetadata = new KafkaStreamMetadata(tableConfig.getIndexingConfig().getStreamConfigs());
                if (streamMetadata.hasSimpleKafkaConsumerType() && !streamMetadata.hasHighLevelKafkaConsumerType()) {
                    countHLCSegments = false;
                }
                for (RealtimeSegmentZKMetadata realtimeSegmentZKMetadata : realtimeSegmentZKMetadatas) {
                    SegmentMetadata segmentMetadata = new SegmentMetadataImpl(realtimeSegmentZKMetadata);
                    segmentMetadataList.add(segmentMetadata);
                }
                // Update the gauge to contain the total document count in the segments
                _validationMetrics.updateTotalDocumentsGauge(tableName, computeRealtimeTotalDocumentInSegments(segmentMetadataList, countHLCSegments));
                if (streamMetadata.hasSimpleKafkaConsumerType()) {
                    validateLLCSegments(tableName, tableConfig);
                }
            } catch (Exception e) {
                if (tableConfig == null) {
                    LOGGER.warn("Cannot get realtime tableconfig for {}", tableName);
                } else if (streamMetadata == null) {
                    LOGGER.warn("Cannot get streamconfig for {}", tableName);
                } else {
                    LOGGER.error("Exception while validating table {}", tableName, e);
                }
            }
        } else {
            LOGGER.warn("Ignoring table type {} for table {}", tableType, tableName);
        }
    }
    LOGGER.info("Validation completed");
}
Also used : KafkaStreamMetadata(com.linkedin.pinot.common.metadata.stream.KafkaStreamMetadata) TableType(com.linkedin.pinot.common.utils.CommonConstants.Helix.TableType) ArrayList(java.util.ArrayList) SegmentMetadata(com.linkedin.pinot.common.segment.SegmentMetadata) RealtimeSegmentZKMetadata(com.linkedin.pinot.common.metadata.segment.RealtimeSegmentZKMetadata) ArrayList(java.util.ArrayList) List(java.util.List) SegmentMetadataImpl(com.linkedin.pinot.core.segment.index.SegmentMetadataImpl) AbstractTableConfig(com.linkedin.pinot.common.config.AbstractTableConfig) ZNRecord(org.apache.helix.ZNRecord)

Example 18 with ZNRecord

use of org.apache.helix.ZNRecord in project pinot by linkedin.

the class PinotRealtimeSegmentManager method refreshWatchers.

/**
   * Helper method to perform idempotent operation to refresh all watches (related to real-time segments):
   * - Data change listener for all existing real-time tables.
   * - Child creation listener for all existing real-time tables.
   * - Data change listener for all existing real-time segments
   *
   * @param path
   */
private void refreshWatchers(String path) {
    LOGGER.info("Received change notification for path: {}", path);
    List<Stat> stats = new ArrayList<>();
    List<ZNRecord> tableConfigs = _pinotHelixResourceManager.getPropertyStore().getChildren(TABLE_CONFIG, stats, 0);
    if (tableConfigs == null) {
        return;
    }
    for (ZNRecord tableConfigZnRecord : tableConfigs) {
        try {
            String znRecordId = tableConfigZnRecord.getId();
            if (TableNameBuilder.getTableTypeFromTableName(znRecordId) == TableType.REALTIME) {
                AbstractTableConfig abstractTableConfig = AbstractTableConfig.fromZnRecord(tableConfigZnRecord);
                KafkaStreamMetadata metadata = new KafkaStreamMetadata(abstractTableConfig.getIndexingConfig().getStreamConfigs());
                if (metadata.hasHighLevelKafkaConsumerType()) {
                    String realtimeTable = abstractTableConfig.getTableName();
                    String realtimeSegmentsPathForTable = _propertyStorePath + SEGMENTS_PATH + "/" + realtimeTable;
                    LOGGER.info("Setting data/child changes watch for real-time table '{}'", realtimeTable);
                    _zkClient.subscribeDataChanges(realtimeSegmentsPathForTable, this);
                    _zkClient.subscribeChildChanges(realtimeSegmentsPathForTable, this);
                    List<String> childNames = _pinotHelixResourceManager.getPropertyStore().getChildNames(SEGMENTS_PATH + "/" + realtimeTable, 0);
                    if (childNames != null && !childNames.isEmpty()) {
                        for (String segmentName : childNames) {
                            if (!SegmentName.isHighLevelConsumerSegmentName(segmentName)) {
                                continue;
                            }
                            String segmentPath = realtimeSegmentsPathForTable + "/" + segmentName;
                            RealtimeSegmentZKMetadata realtimeSegmentZKMetadata = ZKMetadataProvider.getRealtimeSegmentZKMetadata(_pinotHelixResourceManager.getPropertyStore(), abstractTableConfig.getTableName(), segmentName);
                            if (realtimeSegmentZKMetadata == null) {
                                // The segment got deleted by retention manager
                                continue;
                            }
                            if (realtimeSegmentZKMetadata.getStatus() == Status.IN_PROGRESS) {
                                LOGGER.info("Setting data change watch for real-time segment currently being consumed: {}", segmentPath);
                                _zkClient.subscribeDataChanges(segmentPath, this);
                            } else {
                                _zkClient.unsubscribeDataChanges(segmentPath, this);
                            }
                        }
                    }
                }
            }
        } catch (Exception e) {
            // we want to continue setting watches for other tables for any kind of exception here so that
            // errors with one table don't impact others
            LOGGER.error("Caught exception while processing ZNRecord id: {}. Skipping node to continue setting watches", tableConfigZnRecord.getId(), e);
        }
    }
}
Also used : RealtimeSegmentZKMetadata(com.linkedin.pinot.common.metadata.segment.RealtimeSegmentZKMetadata) KafkaStreamMetadata(com.linkedin.pinot.common.metadata.stream.KafkaStreamMetadata) Stat(org.apache.zookeeper.data.Stat) ArrayList(java.util.ArrayList) AbstractTableConfig(com.linkedin.pinot.common.config.AbstractTableConfig) ZNRecord(org.apache.helix.ZNRecord) JSONException(org.json.JSONException) IOException(java.io.IOException)

Example 19 with ZNRecord

use of org.apache.helix.ZNRecord in project pinot by linkedin.

the class RetentionManager method retrieveSegmentMetadataForTable.

private List<SegmentZKMetadata> retrieveSegmentMetadataForTable(String tableName) {
    List<SegmentZKMetadata> segmentMetadataList = new ArrayList<>();
    ZkHelixPropertyStore<ZNRecord> propertyStore = _pinotHelixResourceManager.getPropertyStore();
    TableType tableType = TableNameBuilder.getTableTypeFromTableName(tableName);
    assert tableType != null;
    switch(tableType) {
        case OFFLINE:
            List<OfflineSegmentZKMetadata> offlineSegmentZKMetadatas = ZKMetadataProvider.getOfflineSegmentZKMetadataListForTable(propertyStore, tableName);
            for (OfflineSegmentZKMetadata offlineSegmentZKMetadata : offlineSegmentZKMetadatas) {
                segmentMetadataList.add(offlineSegmentZKMetadata);
            }
            break;
        case REALTIME:
            List<RealtimeSegmentZKMetadata> realtimeSegmentZKMetadatas = ZKMetadataProvider.getRealtimeSegmentZKMetadataListForTable(propertyStore, tableName);
            for (RealtimeSegmentZKMetadata realtimeSegmentZKMetadata : realtimeSegmentZKMetadatas) {
                segmentMetadataList.add(realtimeSegmentZKMetadata);
            }
            break;
        default:
            throw new IllegalArgumentException("No table type matches table name: " + tableName);
    }
    return segmentMetadataList;
}
Also used : SegmentZKMetadata(com.linkedin.pinot.common.metadata.segment.SegmentZKMetadata) RealtimeSegmentZKMetadata(com.linkedin.pinot.common.metadata.segment.RealtimeSegmentZKMetadata) OfflineSegmentZKMetadata(com.linkedin.pinot.common.metadata.segment.OfflineSegmentZKMetadata) RealtimeSegmentZKMetadata(com.linkedin.pinot.common.metadata.segment.RealtimeSegmentZKMetadata) TableType(com.linkedin.pinot.common.utils.CommonConstants.Helix.TableType) OfflineSegmentZKMetadata(com.linkedin.pinot.common.metadata.segment.OfflineSegmentZKMetadata) ArrayList(java.util.ArrayList) ZNRecord(org.apache.helix.ZNRecord)

Example 20 with ZNRecord

use of org.apache.helix.ZNRecord in project pinot by linkedin.

the class PinotLLCRealtimeSegmentManager method updateKafkaPartitionsIfNecessary.

/**
   * Update the kafka partitions as necessary to accommodate changes in number of replicas, number of tenants or
   * number of kafka partitions. As new segments are assigned, they will obey the new kafka partition assignment.
   *
   * @param realtimeTableName name of the realtime table
   * @param tableConfig tableConfig from propertystore
   */
public void updateKafkaPartitionsIfNecessary(String realtimeTableName, AbstractTableConfig tableConfig) {
    final ZNRecord partitionAssignment = getKafkaPartitionAssignment(realtimeTableName);
    final Map<String, List<String>> partitionToServersMap = partitionAssignment.getListFields();
    final KafkaStreamMetadata kafkaStreamMetadata = new KafkaStreamMetadata(tableConfig.getIndexingConfig().getStreamConfigs());
    final String realtimeServerTenantName = ControllerTenantNameBuilder.getRealtimeTenantNameForTenant(tableConfig.getTenantConfig().getServer());
    final List<String> currentInstances = getInstances(realtimeServerTenantName);
    // Previous partition count is what we find in the Kafka partition assignment znode.
    // Get the current partition count from Kafka.
    final int prevPartitionCount = partitionToServersMap.size();
    int currentPartitionCount = -1;
    try {
        currentPartitionCount = getKafkaPartitionCount(kafkaStreamMetadata);
    } catch (Exception e) {
        LOGGER.warn("Could not get partition count for {}. Leaving kafka partition count at {}", realtimeTableName, currentPartitionCount);
        return;
    }
    // Previous instance set is what we find in the Kafka partition assignment znode (values of the map entries)
    final Set<String> prevInstances = new HashSet<>(currentInstances.size());
    for (List<String> servers : partitionToServersMap.values()) {
        prevInstances.addAll(servers);
    }
    final int prevReplicaCount = partitionToServersMap.entrySet().iterator().next().getValue().size();
    final int currentReplicaCount = Integer.valueOf(tableConfig.getValidationConfig().getReplicasPerPartition());
    boolean updateKafkaAssignment = false;
    if (!prevInstances.equals(new HashSet<String>(currentInstances))) {
        LOGGER.info("Detected change in instances for table {}", realtimeTableName);
        updateKafkaAssignment = true;
    }
    if (prevPartitionCount != currentPartitionCount) {
        LOGGER.info("Detected change in Kafka partition count for table {} from {} to {}", realtimeTableName, prevPartitionCount, currentPartitionCount);
        updateKafkaAssignment = true;
    }
    if (prevReplicaCount != currentReplicaCount) {
        LOGGER.info("Detected change in per-partition replica count for table {} from {} to {}", realtimeTableName, prevReplicaCount, currentReplicaCount);
        updateKafkaAssignment = true;
    }
    if (!updateKafkaAssignment) {
        LOGGER.info("Not updating Kafka partition assignment for table {}", realtimeTableName);
        return;
    }
    // Generate new kafka partition assignment and update the znode
    if (currentInstances.size() < currentReplicaCount) {
        LOGGER.error("Cannot have {} replicas in {} instances for {}.Not updating partition assignment", currentReplicaCount, currentInstances.size(), realtimeTableName);
        return;
    }
    ZNRecord newPartitionAssignment = generatePartitionAssignment(kafkaStreamMetadata.getKafkaTopicName(), currentPartitionCount, currentInstances, currentReplicaCount);
    writeKafkaPartitionAssignemnt(realtimeTableName, newPartitionAssignment);
    LOGGER.info("Successfully updated Kafka partition assignment for table {}", realtimeTableName);
}
Also used : KafkaStreamMetadata(com.linkedin.pinot.common.metadata.stream.KafkaStreamMetadata) List(java.util.List) ArrayList(java.util.ArrayList) ZNRecord(org.apache.helix.ZNRecord) TimeoutException(java.util.concurrent.TimeoutException) HashSet(java.util.HashSet)

Aggregations

ZNRecord (org.apache.helix.ZNRecord)80 ArrayList (java.util.ArrayList)23 IdealState (org.apache.helix.model.IdealState)15 Test (org.testng.annotations.Test)13 LLCRealtimeSegmentZKMetadata (com.linkedin.pinot.common.metadata.segment.LLCRealtimeSegmentZKMetadata)12 HashMap (java.util.HashMap)12 LLCSegmentName (com.linkedin.pinot.common.utils.LLCSegmentName)11 AbstractTableConfig (com.linkedin.pinot.common.config.AbstractTableConfig)10 RealtimeSegmentZKMetadata (com.linkedin.pinot.common.metadata.segment.RealtimeSegmentZKMetadata)8 HelixAdmin (org.apache.helix.HelixAdmin)8 ZNRecordSerializer (org.apache.helix.manager.zk.ZNRecordSerializer)8 HashSet (java.util.HashSet)6 List (java.util.List)6 ExternalView (org.apache.helix.model.ExternalView)6 OfflineSegmentZKMetadata (com.linkedin.pinot.common.metadata.segment.OfflineSegmentZKMetadata)5 KafkaStreamMetadata (com.linkedin.pinot.common.metadata.stream.KafkaStreamMetadata)5 BeforeTest (org.testng.annotations.BeforeTest)5 TableType (com.linkedin.pinot.common.utils.CommonConstants.Helix.TableType)4 PinotHelixPropertyStoreZnRecordProvider (com.linkedin.pinot.common.utils.helix.PinotHelixPropertyStoreZnRecordProvider)4 ZkClient (org.apache.helix.manager.zk.ZkClient)4