use of com.linkedin.pinot.common.metadata.stream.KafkaStreamMetadata in project pinot by linkedin.
the class PinotHelixResourceManager method ensureRealtimeClusterIsSetUp.
private void ensureRealtimeClusterIsSetUp(AbstractTableConfig config, String realtimeTableName, IndexingConfig indexingConfig) {
KafkaStreamMetadata kafkaStreamMetadata = new KafkaStreamMetadata(indexingConfig.getStreamConfigs());
IdealState idealState = _helixAdmin.getResourceIdealState(_helixClusterName, realtimeTableName);
if (kafkaStreamMetadata.hasHighLevelKafkaConsumerType()) {
if (kafkaStreamMetadata.hasSimpleKafkaConsumerType()) {
// We may be adding on low-level, or creating both.
if (idealState == null) {
// Need to create both. Create high-level consumer first.
createHelixEntriesForHighLevelConsumer(config, realtimeTableName, idealState);
idealState = _helixAdmin.getResourceIdealState(_helixClusterName, realtimeTableName);
LOGGER.info("Configured new HLC for table {}", realtimeTableName);
}
// Fall through to create low-level consumers
} else {
// Only high-level consumer specified in the config.
createHelixEntriesForHighLevelConsumer(config, realtimeTableName, idealState);
// Clean up any LLC table if they are present
PinotLLCRealtimeSegmentManager.getInstance().cleanupLLC(realtimeTableName);
}
}
// Either we have only low-level consumer, or both.
if (kafkaStreamMetadata.hasSimpleKafkaConsumerType()) {
// Will either create idealstate entry, or update the IS entry with new segments
// (unless there are low-level segments already present)
final String llcKafkaPartitionAssignmentPath = ZKMetadataProvider.constructPropertyStorePathForKafkaPartitions(realtimeTableName);
if (!_propertyStore.exists(llcKafkaPartitionAssignmentPath, AccessOption.PERSISTENT)) {
PinotTableIdealStateBuilder.buildLowLevelRealtimeIdealStateFor(realtimeTableName, config, _helixAdmin, _helixClusterName, idealState);
LOGGER.info("Successfully added Helix entries for low-level consumers for {} ", realtimeTableName);
} else {
LOGGER.info("LLC is already set up for table {}, not configuring again", realtimeTableName);
}
}
}
use of com.linkedin.pinot.common.metadata.stream.KafkaStreamMetadata in project pinot by linkedin.
the class ValidationManager method runValidation.
/**
* Runs a validation pass over the currently loaded tables.
*/
public void runValidation() {
if (!_pinotHelixResourceManager.isLeader()) {
LOGGER.info("Skipping validation, not leader!");
return;
}
LOGGER.info("Starting validation");
// Fetch the list of tables
List<String> allTableNames = _pinotHelixResourceManager.getAllPinotTableNames();
ZkHelixPropertyStore<ZNRecord> propertyStore = _pinotHelixResourceManager.getPropertyStore();
for (String tableName : allTableNames) {
List<SegmentMetadata> segmentMetadataList = new ArrayList<SegmentMetadata>();
TableType tableType = TableNameBuilder.getTableTypeFromTableName(tableName);
AbstractTableConfig tableConfig = null;
_pinotHelixResourceManager.rebuildBrokerResourceFromHelixTags(tableName);
// For each table, fetch the metadata for all its segments
if (tableType.equals(TableType.OFFLINE)) {
validateOfflineSegmentPush(propertyStore, tableName, segmentMetadataList);
} else if (tableType.equals(TableType.REALTIME)) {
LOGGER.info("Starting to validate table {}", tableName);
List<RealtimeSegmentZKMetadata> realtimeSegmentZKMetadatas = ZKMetadataProvider.getRealtimeSegmentZKMetadataListForTable(propertyStore, tableName);
// false if this table has ONLY LLC segments (i.e. fully migrated)
boolean countHLCSegments = true;
KafkaStreamMetadata streamMetadata = null;
try {
tableConfig = _pinotHelixResourceManager.getRealtimeTableConfig(tableName);
streamMetadata = new KafkaStreamMetadata(tableConfig.getIndexingConfig().getStreamConfigs());
if (streamMetadata.hasSimpleKafkaConsumerType() && !streamMetadata.hasHighLevelKafkaConsumerType()) {
countHLCSegments = false;
}
for (RealtimeSegmentZKMetadata realtimeSegmentZKMetadata : realtimeSegmentZKMetadatas) {
SegmentMetadata segmentMetadata = new SegmentMetadataImpl(realtimeSegmentZKMetadata);
segmentMetadataList.add(segmentMetadata);
}
// Update the gauge to contain the total document count in the segments
_validationMetrics.updateTotalDocumentsGauge(tableName, computeRealtimeTotalDocumentInSegments(segmentMetadataList, countHLCSegments));
if (streamMetadata.hasSimpleKafkaConsumerType()) {
validateLLCSegments(tableName, tableConfig);
}
} catch (Exception e) {
if (tableConfig == null) {
LOGGER.warn("Cannot get realtime tableconfig for {}", tableName);
} else if (streamMetadata == null) {
LOGGER.warn("Cannot get streamconfig for {}", tableName);
} else {
LOGGER.error("Exception while validating table {}", tableName, e);
}
}
} else {
LOGGER.warn("Ignoring table type {} for table {}", tableType, tableName);
}
}
LOGGER.info("Validation completed");
}
use of com.linkedin.pinot.common.metadata.stream.KafkaStreamMetadata in project pinot by linkedin.
the class PinotRealtimeSegmentManager method assignRealtimeSegmentsToServerInstancesIfNecessary.
private synchronized void assignRealtimeSegmentsToServerInstancesIfNecessary() throws JSONException, IOException {
// Fetch current ideal state snapshot
Map<String, IdealState> idealStateMap = new HashMap<String, IdealState>();
for (String resource : _pinotHelixResourceManager.getAllRealtimeTables()) {
final String tableName = TableNameBuilder.extractRawTableName(resource);
AbstractTableConfig tableConfig = _pinotHelixResourceManager.getTableConfig(tableName, TableType.REALTIME);
KafkaStreamMetadata metadata = new KafkaStreamMetadata(tableConfig.getIndexingConfig().getStreamConfigs());
if (metadata.hasHighLevelKafkaConsumerType()) {
idealStateMap.put(resource, _pinotHelixResourceManager.getHelixAdmin().getResourceIdealState(_pinotHelixResourceManager.getHelixClusterName(), resource));
} else {
LOGGER.debug("Not considering table {} for realtime segment assignment");
}
}
List<Pair<String, String>> listOfSegmentsToAddToInstances = new ArrayList<Pair<String, String>>();
for (String resource : idealStateMap.keySet()) {
try {
IdealState state = idealStateMap.get(resource);
// Are there any partitions?
if (state.getPartitionSet().size() == 0) {
// No, this is a brand new ideal state, so we will add one new segment to every partition and replica
List<String> instancesInResource = new ArrayList<String>();
try {
instancesInResource.addAll(_pinotHelixResourceManager.getServerInstancesForTable(resource, TableType.REALTIME));
} catch (Exception e) {
LOGGER.error("Caught exception while fetching instances for resource {}", resource, e);
_controllerMetrics.addMeteredGlobalValue(ControllerMeter.CONTROLLER_REALTIME_TABLE_SEGMENT_ASSIGNMENT_ERROR, 1L);
}
// Assign a new segment to all server instances
for (String instanceId : instancesInResource) {
InstanceZKMetadata instanceZKMetadata = _pinotHelixResourceManager.getInstanceZKMetadata(instanceId);
if (instanceZKMetadata == null) {
LOGGER.warn("Instance {} has no associated instance metadata in ZK, ignoring for segment assignment.", instanceId);
_controllerMetrics.addMeteredGlobalValue(ControllerMeter.CONTROLLER_REALTIME_TABLE_SEGMENT_ASSIGNMENT_ERROR, 1L);
continue;
}
String groupId = instanceZKMetadata.getGroupId(resource);
String partitionId = instanceZKMetadata.getPartition(resource);
if (groupId != null && !groupId.isEmpty() && partitionId != null && !partitionId.isEmpty()) {
listOfSegmentsToAddToInstances.add(new Pair<String, String>(new HLCSegmentName(groupId, partitionId, String.valueOf(System.currentTimeMillis())).getSegmentName(), instanceId));
} else {
LOGGER.warn("Instance {} has invalid groupId ({}) and/or partitionId ({}) for resource {}, ignoring for segment assignment.", instanceId, groupId, partitionId, resource);
_controllerMetrics.addMeteredGlobalValue(ControllerMeter.CONTROLLER_REALTIME_TABLE_SEGMENT_ASSIGNMENT_ERROR, 1L);
}
}
} else {
// Add all server instances to the list of instances for which to assign a realtime segment
Set<String> instancesToAssignRealtimeSegment = new HashSet<String>();
try {
instancesToAssignRealtimeSegment.addAll(_pinotHelixResourceManager.getServerInstancesForTable(resource, TableType.REALTIME));
} catch (Exception e) {
LOGGER.error("Caught exception while fetching instances for resource {}", resource, e);
_controllerMetrics.addMeteredGlobalValue(ControllerMeter.CONTROLLER_REALTIME_TABLE_SEGMENT_ASSIGNMENT_ERROR, 1L);
}
// Remove server instances that are currently processing a segment
for (String partition : state.getPartitionSet()) {
// Helix partition is the segment name
if (SegmentName.isHighLevelConsumerSegmentName(partition)) {
HLCSegmentName segName = new HLCSegmentName(partition);
RealtimeSegmentZKMetadata realtimeSegmentZKMetadata = ZKMetadataProvider.getRealtimeSegmentZKMetadata(_pinotHelixResourceManager.getPropertyStore(), segName.getTableName(), partition);
if (realtimeSegmentZKMetadata == null) {
// Segment was deleted by retention manager.
continue;
}
if (realtimeSegmentZKMetadata.getStatus() == Status.IN_PROGRESS) {
instancesToAssignRealtimeSegment.removeAll(state.getInstanceSet(partition));
}
}
}
// Assign a new segment to the server instances not currently processing this segment
for (String instanceId : instancesToAssignRealtimeSegment) {
InstanceZKMetadata instanceZKMetadata = _pinotHelixResourceManager.getInstanceZKMetadata(instanceId);
String groupId = instanceZKMetadata.getGroupId(resource);
String partitionId = instanceZKMetadata.getPartition(resource);
listOfSegmentsToAddToInstances.add(new Pair<String, String>(new HLCSegmentName(groupId, partitionId, String.valueOf(System.currentTimeMillis())).getSegmentName(), instanceId));
}
}
} catch (Exception e) {
LOGGER.warn("Caught exception while processing resource {}, skipping.", resource, e);
_controllerMetrics.addMeteredGlobalValue(ControllerMeter.CONTROLLER_REALTIME_TABLE_SEGMENT_ASSIGNMENT_ERROR, 1L);
}
}
LOGGER.info("Computed list of new segments to add : " + Arrays.toString(listOfSegmentsToAddToInstances.toArray()));
// Add the new segments to the server instances
for (final Pair<String, String> segmentIdAndInstanceId : listOfSegmentsToAddToInstances) {
final String segmentId = segmentIdAndInstanceId.getFirst();
final String instanceName = segmentIdAndInstanceId.getSecond();
try {
final HLCSegmentName segName = new HLCSegmentName(segmentId);
String resourceName = segName.getTableName();
// Does the ideal state already contain this segment?
if (!idealStateMap.get(resourceName).getPartitionSet().contains(segmentId)) {
// No, add it
// Create the realtime segment metadata
RealtimeSegmentZKMetadata realtimeSegmentMetadataToAdd = new RealtimeSegmentZKMetadata();
realtimeSegmentMetadataToAdd.setTableName(TableNameBuilder.extractRawTableName(resourceName));
realtimeSegmentMetadataToAdd.setSegmentType(SegmentType.REALTIME);
realtimeSegmentMetadataToAdd.setStatus(Status.IN_PROGRESS);
realtimeSegmentMetadataToAdd.setSegmentName(segmentId);
// Add the new metadata to the property store
ZKMetadataProvider.setRealtimeSegmentZKMetadata(_pinotHelixResourceManager.getPropertyStore(), realtimeSegmentMetadataToAdd);
// Update the ideal state to add the new realtime segment
HelixHelper.updateIdealState(_pinotHelixResourceManager.getHelixZkManager(), resourceName, new Function<IdealState, IdealState>() {
@Override
public IdealState apply(IdealState idealState) {
return PinotTableIdealStateBuilder.addNewRealtimeSegmentToIdealState(segmentId, idealState, instanceName);
}
}, RetryPolicies.exponentialBackoffRetryPolicy(5, 500L, 2.0f));
}
} catch (Exception e) {
LOGGER.warn("Caught exception while processing segment {} for instance {}, skipping.", segmentId, instanceName, e);
_controllerMetrics.addMeteredGlobalValue(ControllerMeter.CONTROLLER_REALTIME_TABLE_SEGMENT_ASSIGNMENT_ERROR, 1L);
}
}
}
use of com.linkedin.pinot.common.metadata.stream.KafkaStreamMetadata in project pinot by linkedin.
the class PinotRealtimeSegmentManager method refreshWatchers.
/**
* Helper method to perform idempotent operation to refresh all watches (related to real-time segments):
* - Data change listener for all existing real-time tables.
* - Child creation listener for all existing real-time tables.
* - Data change listener for all existing real-time segments
*
* @param path
*/
private void refreshWatchers(String path) {
LOGGER.info("Received change notification for path: {}", path);
List<Stat> stats = new ArrayList<>();
List<ZNRecord> tableConfigs = _pinotHelixResourceManager.getPropertyStore().getChildren(TABLE_CONFIG, stats, 0);
if (tableConfigs == null) {
return;
}
for (ZNRecord tableConfigZnRecord : tableConfigs) {
try {
String znRecordId = tableConfigZnRecord.getId();
if (TableNameBuilder.getTableTypeFromTableName(znRecordId) == TableType.REALTIME) {
AbstractTableConfig abstractTableConfig = AbstractTableConfig.fromZnRecord(tableConfigZnRecord);
KafkaStreamMetadata metadata = new KafkaStreamMetadata(abstractTableConfig.getIndexingConfig().getStreamConfigs());
if (metadata.hasHighLevelKafkaConsumerType()) {
String realtimeTable = abstractTableConfig.getTableName();
String realtimeSegmentsPathForTable = _propertyStorePath + SEGMENTS_PATH + "/" + realtimeTable;
LOGGER.info("Setting data/child changes watch for real-time table '{}'", realtimeTable);
_zkClient.subscribeDataChanges(realtimeSegmentsPathForTable, this);
_zkClient.subscribeChildChanges(realtimeSegmentsPathForTable, this);
List<String> childNames = _pinotHelixResourceManager.getPropertyStore().getChildNames(SEGMENTS_PATH + "/" + realtimeTable, 0);
if (childNames != null && !childNames.isEmpty()) {
for (String segmentName : childNames) {
if (!SegmentName.isHighLevelConsumerSegmentName(segmentName)) {
continue;
}
String segmentPath = realtimeSegmentsPathForTable + "/" + segmentName;
RealtimeSegmentZKMetadata realtimeSegmentZKMetadata = ZKMetadataProvider.getRealtimeSegmentZKMetadata(_pinotHelixResourceManager.getPropertyStore(), abstractTableConfig.getTableName(), segmentName);
if (realtimeSegmentZKMetadata == null) {
// The segment got deleted by retention manager
continue;
}
if (realtimeSegmentZKMetadata.getStatus() == Status.IN_PROGRESS) {
LOGGER.info("Setting data change watch for real-time segment currently being consumed: {}", segmentPath);
_zkClient.subscribeDataChanges(segmentPath, this);
} else {
_zkClient.unsubscribeDataChanges(segmentPath, this);
}
}
}
}
}
} catch (Exception e) {
// we want to continue setting watches for other tables for any kind of exception here so that
// errors with one table don't impact others
LOGGER.error("Caught exception while processing ZNRecord id: {}. Skipping node to continue setting watches", tableConfigZnRecord.getId(), e);
}
}
}
use of com.linkedin.pinot.common.metadata.stream.KafkaStreamMetadata in project pinot by linkedin.
the class PinotLLCRealtimeSegmentManager method updateKafkaPartitionsIfNecessary.
/**
* Update the kafka partitions as necessary to accommodate changes in number of replicas, number of tenants or
* number of kafka partitions. As new segments are assigned, they will obey the new kafka partition assignment.
*
* @param realtimeTableName name of the realtime table
* @param tableConfig tableConfig from propertystore
*/
public void updateKafkaPartitionsIfNecessary(String realtimeTableName, AbstractTableConfig tableConfig) {
final ZNRecord partitionAssignment = getKafkaPartitionAssignment(realtimeTableName);
final Map<String, List<String>> partitionToServersMap = partitionAssignment.getListFields();
final KafkaStreamMetadata kafkaStreamMetadata = new KafkaStreamMetadata(tableConfig.getIndexingConfig().getStreamConfigs());
final String realtimeServerTenantName = ControllerTenantNameBuilder.getRealtimeTenantNameForTenant(tableConfig.getTenantConfig().getServer());
final List<String> currentInstances = getInstances(realtimeServerTenantName);
// Previous partition count is what we find in the Kafka partition assignment znode.
// Get the current partition count from Kafka.
final int prevPartitionCount = partitionToServersMap.size();
int currentPartitionCount = -1;
try {
currentPartitionCount = getKafkaPartitionCount(kafkaStreamMetadata);
} catch (Exception e) {
LOGGER.warn("Could not get partition count for {}. Leaving kafka partition count at {}", realtimeTableName, currentPartitionCount);
return;
}
// Previous instance set is what we find in the Kafka partition assignment znode (values of the map entries)
final Set<String> prevInstances = new HashSet<>(currentInstances.size());
for (List<String> servers : partitionToServersMap.values()) {
prevInstances.addAll(servers);
}
final int prevReplicaCount = partitionToServersMap.entrySet().iterator().next().getValue().size();
final int currentReplicaCount = Integer.valueOf(tableConfig.getValidationConfig().getReplicasPerPartition());
boolean updateKafkaAssignment = false;
if (!prevInstances.equals(new HashSet<String>(currentInstances))) {
LOGGER.info("Detected change in instances for table {}", realtimeTableName);
updateKafkaAssignment = true;
}
if (prevPartitionCount != currentPartitionCount) {
LOGGER.info("Detected change in Kafka partition count for table {} from {} to {}", realtimeTableName, prevPartitionCount, currentPartitionCount);
updateKafkaAssignment = true;
}
if (prevReplicaCount != currentReplicaCount) {
LOGGER.info("Detected change in per-partition replica count for table {} from {} to {}", realtimeTableName, prevReplicaCount, currentReplicaCount);
updateKafkaAssignment = true;
}
if (!updateKafkaAssignment) {
LOGGER.info("Not updating Kafka partition assignment for table {}", realtimeTableName);
return;
}
// Generate new kafka partition assignment and update the znode
if (currentInstances.size() < currentReplicaCount) {
LOGGER.error("Cannot have {} replicas in {} instances for {}.Not updating partition assignment", currentReplicaCount, currentInstances.size(), realtimeTableName);
return;
}
ZNRecord newPartitionAssignment = generatePartitionAssignment(kafkaStreamMetadata.getKafkaTopicName(), currentPartitionCount, currentInstances, currentReplicaCount);
writeKafkaPartitionAssignemnt(realtimeTableName, newPartitionAssignment);
LOGGER.info("Successfully updated Kafka partition assignment for table {}", realtimeTableName);
}
Aggregations