use of org.apache.helix.ZNRecord in project pinot by linkedin.
the class HelixSetupUtils method initPropertyStorePath.
private static void initPropertyStorePath(String helixClusterName, String zkPath) {
String propertyStorePath = PropertyPathConfig.getPath(PropertyType.PROPERTYSTORE, helixClusterName);
ZkHelixPropertyStore<ZNRecord> propertyStore = new ZkHelixPropertyStore<ZNRecord>(zkPath, new ZNRecordSerializer(), propertyStorePath);
propertyStore.create("/CONFIGS", new ZNRecord(""), AccessOption.PERSISTENT);
propertyStore.create("/CONFIGS/CLUSTER", new ZNRecord(""), AccessOption.PERSISTENT);
propertyStore.create("/CONFIGS/TABLE", new ZNRecord(""), AccessOption.PERSISTENT);
propertyStore.create("/CONFIGS/INSTANCE", new ZNRecord(""), AccessOption.PERSISTENT);
propertyStore.create("/SCHEMAS", new ZNRecord(""), AccessOption.PERSISTENT);
propertyStore.create("/SEGMENTS", new ZNRecord(""), AccessOption.PERSISTENT);
}
use of org.apache.helix.ZNRecord in project pinot by linkedin.
the class ValidationManager method runValidation.
/**
* Runs a validation pass over the currently loaded tables.
*/
public void runValidation() {
if (!_pinotHelixResourceManager.isLeader()) {
LOGGER.info("Skipping validation, not leader!");
return;
}
LOGGER.info("Starting validation");
// Fetch the list of tables
List<String> allTableNames = _pinotHelixResourceManager.getAllPinotTableNames();
ZkHelixPropertyStore<ZNRecord> propertyStore = _pinotHelixResourceManager.getPropertyStore();
for (String tableName : allTableNames) {
List<SegmentMetadata> segmentMetadataList = new ArrayList<SegmentMetadata>();
TableType tableType = TableNameBuilder.getTableTypeFromTableName(tableName);
AbstractTableConfig tableConfig = null;
_pinotHelixResourceManager.rebuildBrokerResourceFromHelixTags(tableName);
// For each table, fetch the metadata for all its segments
if (tableType.equals(TableType.OFFLINE)) {
validateOfflineSegmentPush(propertyStore, tableName, segmentMetadataList);
} else if (tableType.equals(TableType.REALTIME)) {
LOGGER.info("Starting to validate table {}", tableName);
List<RealtimeSegmentZKMetadata> realtimeSegmentZKMetadatas = ZKMetadataProvider.getRealtimeSegmentZKMetadataListForTable(propertyStore, tableName);
// false if this table has ONLY LLC segments (i.e. fully migrated)
boolean countHLCSegments = true;
KafkaStreamMetadata streamMetadata = null;
try {
tableConfig = _pinotHelixResourceManager.getRealtimeTableConfig(tableName);
streamMetadata = new KafkaStreamMetadata(tableConfig.getIndexingConfig().getStreamConfigs());
if (streamMetadata.hasSimpleKafkaConsumerType() && !streamMetadata.hasHighLevelKafkaConsumerType()) {
countHLCSegments = false;
}
for (RealtimeSegmentZKMetadata realtimeSegmentZKMetadata : realtimeSegmentZKMetadatas) {
SegmentMetadata segmentMetadata = new SegmentMetadataImpl(realtimeSegmentZKMetadata);
segmentMetadataList.add(segmentMetadata);
}
// Update the gauge to contain the total document count in the segments
_validationMetrics.updateTotalDocumentsGauge(tableName, computeRealtimeTotalDocumentInSegments(segmentMetadataList, countHLCSegments));
if (streamMetadata.hasSimpleKafkaConsumerType()) {
validateLLCSegments(tableName, tableConfig);
}
} catch (Exception e) {
if (tableConfig == null) {
LOGGER.warn("Cannot get realtime tableconfig for {}", tableName);
} else if (streamMetadata == null) {
LOGGER.warn("Cannot get streamconfig for {}", tableName);
} else {
LOGGER.error("Exception while validating table {}", tableName, e);
}
}
} else {
LOGGER.warn("Ignoring table type {} for table {}", tableType, tableName);
}
}
LOGGER.info("Validation completed");
}
use of org.apache.helix.ZNRecord in project pinot by linkedin.
the class PinotRealtimeSegmentManager method refreshWatchers.
/**
* Helper method to perform idempotent operation to refresh all watches (related to real-time segments):
* - Data change listener for all existing real-time tables.
* - Child creation listener for all existing real-time tables.
* - Data change listener for all existing real-time segments
*
* @param path
*/
private void refreshWatchers(String path) {
LOGGER.info("Received change notification for path: {}", path);
List<Stat> stats = new ArrayList<>();
List<ZNRecord> tableConfigs = _pinotHelixResourceManager.getPropertyStore().getChildren(TABLE_CONFIG, stats, 0);
if (tableConfigs == null) {
return;
}
for (ZNRecord tableConfigZnRecord : tableConfigs) {
try {
String znRecordId = tableConfigZnRecord.getId();
if (TableNameBuilder.getTableTypeFromTableName(znRecordId) == TableType.REALTIME) {
AbstractTableConfig abstractTableConfig = AbstractTableConfig.fromZnRecord(tableConfigZnRecord);
KafkaStreamMetadata metadata = new KafkaStreamMetadata(abstractTableConfig.getIndexingConfig().getStreamConfigs());
if (metadata.hasHighLevelKafkaConsumerType()) {
String realtimeTable = abstractTableConfig.getTableName();
String realtimeSegmentsPathForTable = _propertyStorePath + SEGMENTS_PATH + "/" + realtimeTable;
LOGGER.info("Setting data/child changes watch for real-time table '{}'", realtimeTable);
_zkClient.subscribeDataChanges(realtimeSegmentsPathForTable, this);
_zkClient.subscribeChildChanges(realtimeSegmentsPathForTable, this);
List<String> childNames = _pinotHelixResourceManager.getPropertyStore().getChildNames(SEGMENTS_PATH + "/" + realtimeTable, 0);
if (childNames != null && !childNames.isEmpty()) {
for (String segmentName : childNames) {
if (!SegmentName.isHighLevelConsumerSegmentName(segmentName)) {
continue;
}
String segmentPath = realtimeSegmentsPathForTable + "/" + segmentName;
RealtimeSegmentZKMetadata realtimeSegmentZKMetadata = ZKMetadataProvider.getRealtimeSegmentZKMetadata(_pinotHelixResourceManager.getPropertyStore(), abstractTableConfig.getTableName(), segmentName);
if (realtimeSegmentZKMetadata == null) {
// The segment got deleted by retention manager
continue;
}
if (realtimeSegmentZKMetadata.getStatus() == Status.IN_PROGRESS) {
LOGGER.info("Setting data change watch for real-time segment currently being consumed: {}", segmentPath);
_zkClient.subscribeDataChanges(segmentPath, this);
} else {
_zkClient.unsubscribeDataChanges(segmentPath, this);
}
}
}
}
}
} catch (Exception e) {
// we want to continue setting watches for other tables for any kind of exception here so that
// errors with one table don't impact others
LOGGER.error("Caught exception while processing ZNRecord id: {}. Skipping node to continue setting watches", tableConfigZnRecord.getId(), e);
}
}
}
use of org.apache.helix.ZNRecord in project pinot by linkedin.
the class RetentionManager method retrieveSegmentMetadataForTable.
private List<SegmentZKMetadata> retrieveSegmentMetadataForTable(String tableName) {
List<SegmentZKMetadata> segmentMetadataList = new ArrayList<>();
ZkHelixPropertyStore<ZNRecord> propertyStore = _pinotHelixResourceManager.getPropertyStore();
TableType tableType = TableNameBuilder.getTableTypeFromTableName(tableName);
assert tableType != null;
switch(tableType) {
case OFFLINE:
List<OfflineSegmentZKMetadata> offlineSegmentZKMetadatas = ZKMetadataProvider.getOfflineSegmentZKMetadataListForTable(propertyStore, tableName);
for (OfflineSegmentZKMetadata offlineSegmentZKMetadata : offlineSegmentZKMetadatas) {
segmentMetadataList.add(offlineSegmentZKMetadata);
}
break;
case REALTIME:
List<RealtimeSegmentZKMetadata> realtimeSegmentZKMetadatas = ZKMetadataProvider.getRealtimeSegmentZKMetadataListForTable(propertyStore, tableName);
for (RealtimeSegmentZKMetadata realtimeSegmentZKMetadata : realtimeSegmentZKMetadatas) {
segmentMetadataList.add(realtimeSegmentZKMetadata);
}
break;
default:
throw new IllegalArgumentException("No table type matches table name: " + tableName);
}
return segmentMetadataList;
}
use of org.apache.helix.ZNRecord in project pinot by linkedin.
the class PinotLLCRealtimeSegmentManager method updateKafkaPartitionsIfNecessary.
/**
* Update the kafka partitions as necessary to accommodate changes in number of replicas, number of tenants or
* number of kafka partitions. As new segments are assigned, they will obey the new kafka partition assignment.
*
* @param realtimeTableName name of the realtime table
* @param tableConfig tableConfig from propertystore
*/
public void updateKafkaPartitionsIfNecessary(String realtimeTableName, AbstractTableConfig tableConfig) {
final ZNRecord partitionAssignment = getKafkaPartitionAssignment(realtimeTableName);
final Map<String, List<String>> partitionToServersMap = partitionAssignment.getListFields();
final KafkaStreamMetadata kafkaStreamMetadata = new KafkaStreamMetadata(tableConfig.getIndexingConfig().getStreamConfigs());
final String realtimeServerTenantName = ControllerTenantNameBuilder.getRealtimeTenantNameForTenant(tableConfig.getTenantConfig().getServer());
final List<String> currentInstances = getInstances(realtimeServerTenantName);
// Previous partition count is what we find in the Kafka partition assignment znode.
// Get the current partition count from Kafka.
final int prevPartitionCount = partitionToServersMap.size();
int currentPartitionCount = -1;
try {
currentPartitionCount = getKafkaPartitionCount(kafkaStreamMetadata);
} catch (Exception e) {
LOGGER.warn("Could not get partition count for {}. Leaving kafka partition count at {}", realtimeTableName, currentPartitionCount);
return;
}
// Previous instance set is what we find in the Kafka partition assignment znode (values of the map entries)
final Set<String> prevInstances = new HashSet<>(currentInstances.size());
for (List<String> servers : partitionToServersMap.values()) {
prevInstances.addAll(servers);
}
final int prevReplicaCount = partitionToServersMap.entrySet().iterator().next().getValue().size();
final int currentReplicaCount = Integer.valueOf(tableConfig.getValidationConfig().getReplicasPerPartition());
boolean updateKafkaAssignment = false;
if (!prevInstances.equals(new HashSet<String>(currentInstances))) {
LOGGER.info("Detected change in instances for table {}", realtimeTableName);
updateKafkaAssignment = true;
}
if (prevPartitionCount != currentPartitionCount) {
LOGGER.info("Detected change in Kafka partition count for table {} from {} to {}", realtimeTableName, prevPartitionCount, currentPartitionCount);
updateKafkaAssignment = true;
}
if (prevReplicaCount != currentReplicaCount) {
LOGGER.info("Detected change in per-partition replica count for table {} from {} to {}", realtimeTableName, prevReplicaCount, currentReplicaCount);
updateKafkaAssignment = true;
}
if (!updateKafkaAssignment) {
LOGGER.info("Not updating Kafka partition assignment for table {}", realtimeTableName);
return;
}
// Generate new kafka partition assignment and update the znode
if (currentInstances.size() < currentReplicaCount) {
LOGGER.error("Cannot have {} replicas in {} instances for {}.Not updating partition assignment", currentReplicaCount, currentInstances.size(), realtimeTableName);
return;
}
ZNRecord newPartitionAssignment = generatePartitionAssignment(kafkaStreamMetadata.getKafkaTopicName(), currentPartitionCount, currentInstances, currentReplicaCount);
writeKafkaPartitionAssignemnt(realtimeTableName, newPartitionAssignment);
LOGGER.info("Successfully updated Kafka partition assignment for table {}", realtimeTableName);
}
Aggregations