use of org.apache.helix.model.IdealState in project pinot by linkedin.
the class HelixBrokerStarterTest method testResourceAndTagAssignment.
@Test
public void testResourceAndTagAssignment() throws Exception {
IdealState idealState;
Assert.assertEquals(_helixAdmin.getInstancesInClusterWithTag(HELIX_CLUSTER_NAME, "DefaultTenant_BROKER").size(), 6);
idealState = _helixAdmin.getResourceIdealState(HELIX_CLUSTER_NAME, CommonConstants.Helix.BROKER_RESOURCE_INSTANCE);
Assert.assertEquals(idealState.getInstanceSet(DINING_TABLE_NAME).size(), SEGMENT_COUNT);
ExternalView externalView = _helixAdmin.getResourceExternalView(HELIX_CLUSTER_NAME, CommonConstants.Helix.BROKER_RESOURCE_INSTANCE);
Assert.assertEquals(externalView.getStateMap(DINING_TABLE_NAME).size(), SEGMENT_COUNT);
HelixExternalViewBasedRouting helixExternalViewBasedRouting = _helixBrokerStarter.getHelixExternalViewBasedRouting();
Field brokerRoutingTableField;
brokerRoutingTableField = HelixExternalViewBasedRouting.class.getDeclaredField("_brokerRoutingTable");
brokerRoutingTableField.setAccessible(true);
final Map<String, List<ServerToSegmentSetMap>> brokerRoutingTable = (Map<String, List<ServerToSegmentSetMap>>) brokerRoutingTableField.get(helixExternalViewBasedRouting);
// Wait up to 30s for routing table to reach the expected size
waitForPredicate(new Callable<Boolean>() {
@Override
public Boolean call() throws Exception {
return brokerRoutingTable.size() == 1;
}
}, 30000L);
Assert.assertEquals(Arrays.toString(brokerRoutingTable.keySet().toArray()), "[dining_OFFLINE]");
final String tableName = "coffee";
JSONObject buildCreateOfflineTableV2JSON = ControllerRequestBuilderUtil.buildCreateOfflineTableJSON(tableName, "testServer", "testBroker", 1);
AbstractTableConfig config = AbstractTableConfig.init(buildCreateOfflineTableV2JSON.toString());
_pinotResourceManager.addTable(config);
Assert.assertEquals(_helixAdmin.getInstancesInClusterWithTag(HELIX_CLUSTER_NAME, "DefaultTenant_BROKER").size(), 6);
idealState = _helixAdmin.getResourceIdealState(HELIX_CLUSTER_NAME, CommonConstants.Helix.BROKER_RESOURCE_INSTANCE);
Assert.assertEquals(idealState.getInstanceSet(COFFEE_TABLE_NAME).size(), SEGMENT_COUNT);
Assert.assertEquals(idealState.getInstanceSet(DINING_TABLE_NAME).size(), SEGMENT_COUNT);
// Wait up to 30s for broker external view to reach the expected size
waitForPredicate(new Callable<Boolean>() {
@Override
public Boolean call() throws Exception {
return _helixAdmin.getResourceExternalView(HELIX_CLUSTER_NAME, CommonConstants.Helix.BROKER_RESOURCE_INSTANCE).getStateMap(COFFEE_TABLE_NAME).size() == SEGMENT_COUNT;
}
}, 30000L);
externalView = _helixAdmin.getResourceExternalView(HELIX_CLUSTER_NAME, CommonConstants.Helix.BROKER_RESOURCE_INSTANCE);
Assert.assertEquals(externalView.getStateMap(COFFEE_TABLE_NAME).size(), SEGMENT_COUNT);
// Wait up to 30s for routing table to reach the expected size
waitForPredicate(new Callable<Boolean>() {
@Override
public Boolean call() throws Exception {
return brokerRoutingTable.size() == 2;
}
}, 30000L);
Object[] tableArray = brokerRoutingTable.keySet().toArray();
Arrays.sort(tableArray);
Assert.assertEquals(Arrays.toString(tableArray), "[coffee_OFFLINE, dining_OFFLINE]");
Set<String> serverSet = brokerRoutingTable.get(DINING_TABLE_NAME).get(0).getServerSet();
Assert.assertEquals(brokerRoutingTable.get(DINING_TABLE_NAME).get(0).getSegmentSet(serverSet.iterator().next()).size(), 5);
final String dataResource = DINING_TABLE_NAME;
addOneSegment(dataResource);
// Wait up to 30s for external view to reach the expected size
waitForPredicate(new Callable<Boolean>() {
@Override
public Boolean call() throws Exception {
return _helixAdmin.getResourceExternalView(HELIX_CLUSTER_NAME, DINING_TABLE_NAME).getPartitionSet().size() == SEGMENT_COUNT;
}
}, 30000L);
externalView = _helixAdmin.getResourceExternalView(HELIX_CLUSTER_NAME, DINING_TABLE_NAME);
Assert.assertEquals(externalView.getPartitionSet().size(), SEGMENT_COUNT);
tableArray = brokerRoutingTable.keySet().toArray();
Arrays.sort(tableArray);
Assert.assertEquals(Arrays.toString(tableArray), "[coffee_OFFLINE, dining_OFFLINE]");
// Wait up to 30s for routing table to reach the expected size
waitForPredicate(new Callable<Boolean>() {
@Override
public Boolean call() throws Exception {
ServerToSegmentSetMap routingTable = brokerRoutingTable.get(DINING_TABLE_NAME).get(0);
String firstServer = routingTable.getServerSet().iterator().next();
return routingTable.getSegmentSet(firstServer).size() == SEGMENT_COUNT;
}
}, 30000L);
serverSet = brokerRoutingTable.get(DINING_TABLE_NAME).get(0).getServerSet();
Assert.assertEquals(brokerRoutingTable.get(DINING_TABLE_NAME).get(0).getSegmentSet(serverSet.iterator().next()).size(), SEGMENT_COUNT);
}
use of org.apache.helix.model.IdealState in project pinot by linkedin.
the class HelixHelper method updateIdealState.
/**
* Updates the ideal state, retrying if necessary in case of concurrent updates to the ideal state.
*
* @param helixManager The HelixManager used to interact with the Helix cluster
* @param resourceName The resource for which to update the ideal state
* @param updater A function that returns an updated ideal state given an input ideal state
*/
public static void updateIdealState(final HelixManager helixManager, final String resourceName, final Function<IdealState, IdealState> updater, RetryPolicy policy) {
boolean successful = policy.attempt(new Callable<Boolean>() {
@Override
public Boolean call() {
HelixDataAccessor dataAccessor = helixManager.getHelixDataAccessor();
PropertyKey propertyKey = dataAccessor.keyBuilder().idealStates(resourceName);
// Create an updated version of the ideal state
IdealState idealState = dataAccessor.getProperty(propertyKey);
PropertyKey key = dataAccessor.keyBuilder().idealStates(resourceName);
String path = key.getPath();
// Make a copy of the the idealState above to pass it to the updater, instead of querying again,
// as the state my change between the queries.
ZNRecordSerializer znRecordSerializer = new ZNRecordSerializer();
IdealState idealStateCopy = new IdealState((ZNRecord) znRecordSerializer.deserialize(znRecordSerializer.serialize(idealState.getRecord())));
IdealState updatedIdealState;
try {
updatedIdealState = updater.apply(idealStateCopy);
} catch (Exception e) {
LOGGER.error("Caught exception while updating ideal state", e);
return false;
}
// If there are changes to apply, apply them
if (!EqualityUtils.isEqual(idealState, updatedIdealState) && updatedIdealState != null) {
BaseDataAccessor<ZNRecord> baseDataAccessor = dataAccessor.getBaseDataAccessor();
boolean success;
// If the ideal state is large enough, enable compression
if (MAX_PARTITION_COUNT_IN_UNCOMPRESSED_IDEAL_STATE < updatedIdealState.getPartitionSet().size()) {
updatedIdealState.getRecord().setBooleanField("enableCompression", true);
}
try {
success = baseDataAccessor.set(path, updatedIdealState.getRecord(), idealState.getRecord().getVersion(), AccessOption.PERSISTENT);
} catch (Exception e) {
boolean idealStateIsCompressed = updatedIdealState.getRecord().getBooleanField("enableCompression", false);
LOGGER.warn("Caught exception while updating ideal state for resource {} (compressed={}), retrying.", resourceName, idealStateIsCompressed, e);
return false;
}
if (success) {
return true;
} else {
LOGGER.warn("Failed to update ideal state for resource {}, retrying.", resourceName);
return false;
}
} else {
LOGGER.warn("Idempotent or null ideal state update for resource {}, skipping update.", resourceName);
return true;
}
}
});
if (!successful) {
throw new RuntimeException("Failed to update ideal state for resource " + resourceName);
}
}
use of org.apache.helix.model.IdealState in project pinot by linkedin.
the class SegmentStatusChecker method runSegmentMetrics.
/**
* Runs a segment status pass over the currently loaded tables.
*/
public void runSegmentMetrics() {
if (!_pinotHelixResourceManager.isLeader()) {
LOGGER.info("Skipping Segment Status check, not leader!");
setStatusToDefault();
stop();
return;
}
long startTime = System.nanoTime();
LOGGER.info("Starting Segment Status check for metrics");
// Fetch the list of tables
List<String> allTableNames = _pinotHelixResourceManager.getAllPinotTableNames();
String helixClusterName = _pinotHelixResourceManager.getHelixClusterName();
HelixAdmin helixAdmin = _pinotHelixResourceManager.getHelixAdmin();
int realTimeTableCount = 0;
int offlineTableCount = 0;
ZkHelixPropertyStore<ZNRecord> propertyStore = _pinotHelixResourceManager.getPropertyStore();
for (String tableName : allTableNames) {
if (TableNameBuilder.getTableTypeFromTableName(tableName).equals(CommonConstants.Helix.TableType.OFFLINE)) {
offlineTableCount++;
} else {
realTimeTableCount++;
}
IdealState idealState = helixAdmin.getResourceIdealState(helixClusterName, tableName);
if ((idealState == null) || (idealState.getPartitionSet().isEmpty())) {
_metricsRegistry.setValueOfTableGauge(tableName, ControllerGauge.NUMBER_OF_REPLICAS, 1);
_metricsRegistry.setValueOfTableGauge(tableName, ControllerGauge.PERCENT_OF_REPLICAS, 100);
_metricsRegistry.setValueOfTableGauge(tableName, ControllerGauge.PERCENT_SEGMENTS_AVAILABLE, 100);
continue;
}
_metricsRegistry.setValueOfTableGauge(tableName, ControllerGauge.IDEALSTATE_ZNODE_SIZE, idealState.toString().length());
ExternalView externalView = helixAdmin.getResourceExternalView(helixClusterName, tableName);
// Keeps track of maximum number of replicas in ideal state
int nReplicasIdealMax = 0;
// Keeps track of minimum number of replicas in external view
int nReplicasExternal = -1;
// Keeps track of number of segments in error state
int nErrors = 0;
// Keeeps track of number segments with no online replicas
int nOffline = 0;
// Counts number of segments
int nSegments = 0;
for (String partitionName : idealState.getPartitionSet()) {
int nReplicas = 0;
int nIdeal = 0;
nSegments++;
// Skip segments not online in ideal state
for (Map.Entry<String, String> serverAndState : idealState.getInstanceStateMap(partitionName).entrySet()) {
if (serverAndState == null) {
break;
}
if (serverAndState.getValue().equals(ONLINE)) {
nIdeal++;
break;
}
}
if (nIdeal == 0) {
// No online segments in ideal state
continue;
}
nReplicasIdealMax = (idealState.getInstanceStateMap(partitionName).size() > nReplicasIdealMax) ? idealState.getInstanceStateMap(partitionName).size() : nReplicasIdealMax;
if ((externalView == null) || (externalView.getStateMap(partitionName) == null)) {
// No replicas for this segment
TableType tableType = TableNameBuilder.getTableTypeFromTableName(tableName);
if ((tableType != null) && (tableType.equals(TableType.OFFLINE))) {
OfflineSegmentZKMetadata segmentZKMetadata = ZKMetadataProvider.getOfflineSegmentZKMetadata(propertyStore, tableName, partitionName);
if (segmentZKMetadata != null && segmentZKMetadata.getPushTime() > System.currentTimeMillis() - _waitForPushTimeSeconds * 1000) {
// push not yet finished, skip
continue;
}
}
nOffline++;
if (nOffline < MaxOfflineSegmentsToLog) {
LOGGER.warn("Segment {} of table {} has no replicas", partitionName, tableName);
}
nReplicasExternal = 0;
continue;
}
for (Map.Entry<String, String> serverAndState : externalView.getStateMap(partitionName).entrySet()) {
// Count number of online replicas
if (serverAndState.getValue().equals(ONLINE)) {
nReplicas++;
}
if (serverAndState.getValue().equals(ERROR)) {
nErrors++;
}
}
if (nReplicas == 0) {
if (nOffline < MaxOfflineSegmentsToLog) {
LOGGER.warn("Segment {} of table {} has no online replicas", partitionName, tableName);
}
nOffline++;
}
nReplicasExternal = ((nReplicasExternal > nReplicas) || (nReplicasExternal == -1)) ? nReplicas : nReplicasExternal;
}
if (nReplicasExternal == -1) {
nReplicasExternal = (nReplicasIdealMax == 0) ? 1 : 0;
}
// Synchronization provided by Controller Gauge to make sure that only one thread updates the gauge
_metricsRegistry.setValueOfTableGauge(tableName, ControllerGauge.NUMBER_OF_REPLICAS, nReplicasExternal);
_metricsRegistry.setValueOfTableGauge(tableName, ControllerGauge.PERCENT_OF_REPLICAS, (nReplicasIdealMax > 0) ? (nReplicasExternal * 100 / nReplicasIdealMax) : 100);
_metricsRegistry.setValueOfTableGauge(tableName, ControllerGauge.SEGMENTS_IN_ERROR_STATE, nErrors);
_metricsRegistry.setValueOfTableGauge(tableName, ControllerGauge.PERCENT_SEGMENTS_AVAILABLE, (nSegments > 0) ? (100 - (nOffline * 100 / nSegments)) : 100);
if (nOffline > 0) {
LOGGER.warn("Table {} has {} segments with no online replicas", tableName, nOffline);
}
if (nReplicasExternal < nReplicasIdealMax) {
LOGGER.warn("Table {} has {} replicas, below replication threshold :{}", tableName, nReplicasExternal, nReplicasIdealMax);
}
}
_metricsRegistry.setValueOfGlobalGauge(ControllerGauge.REALTIME_TABLE_COUNT, realTimeTableCount);
_metricsRegistry.setValueOfGlobalGauge(ControllerGauge.OFFLINE_TABLE_COUNT, offlineTableCount);
long totalNanos = System.nanoTime() - startTime;
LOGGER.info("Segment status metrics completed in {}ms", TimeUnit.MILLISECONDS.convert(totalNanos, TimeUnit.NANOSECONDS));
}
use of org.apache.helix.model.IdealState in project pinot by linkedin.
the class PinotHelixResourceManager method rebuildBrokerResourceFromHelixTags.
public PinotResourceManagerResponse rebuildBrokerResourceFromHelixTags(final String tableName) {
// Get the broker tag for this table
String brokerTag = null;
TenantConfig tenantConfig = null;
try {
final TableType tableType = TableNameBuilder.getTableTypeFromTableName(tableName);
AbstractTableConfig tableConfig;
if (tableType == TableType.OFFLINE) {
tableConfig = ZKMetadataProvider.getOfflineTableConfig(getPropertyStore(), tableName);
} else if (tableType == TableType.REALTIME) {
tableConfig = ZKMetadataProvider.getRealtimeTableConfig(getPropertyStore(), tableName);
} else {
return new PinotResourceManagerResponse("Table " + tableName + " does not have a table type", false);
}
if (tableConfig == null) {
return new PinotResourceManagerResponse("Table " + tableName + " does not exist", false);
}
tenantConfig = tableConfig.getTenantConfig();
} catch (Exception e) {
LOGGER.warn("Caught exception while getting tenant config for table {}", tableName, e);
return new PinotResourceManagerResponse("Failed to fetch broker tag for table " + tableName + " due to exception: " + e.getMessage(), false);
}
brokerTag = tenantConfig.getBroker();
// Look for all instances tagged with this broker tag
final Set<String> brokerInstances = getAllInstancesForBrokerTenant(brokerTag);
// If we add a new broker, we want to rebuild the broker resource.
HelixAdmin helixAdmin = getHelixAdmin();
String clusterName = getHelixClusterName();
IdealState brokerIdealState = HelixHelper.getBrokerIdealStates(helixAdmin, clusterName);
Set<String> idealStateBrokerInstances = brokerIdealState.getInstanceSet(tableName);
if (idealStateBrokerInstances.equals(brokerInstances)) {
return new PinotResourceManagerResponse("Broker resource is not rebuilt because ideal state is the same for table {} " + tableName, false);
}
// Reset ideal state with the instance list
try {
HelixHelper.updateIdealState(getHelixZkManager(), CommonConstants.Helix.BROKER_RESOURCE_INSTANCE, new Function<IdealState, IdealState>() {
@Nullable
@Override
public IdealState apply(@Nullable IdealState idealState) {
Map<String, String> instanceStateMap = idealState.getInstanceStateMap(tableName);
if (instanceStateMap != null) {
instanceStateMap.clear();
}
for (String brokerInstance : brokerInstances) {
idealState.setPartitionState(tableName, brokerInstance, BrokerOnlineOfflineStateModel.ONLINE);
}
return idealState;
}
}, DEFAULT_RETRY_POLICY);
LOGGER.info("Successfully rebuilt brokerResource for table {}", tableName);
return new PinotResourceManagerResponse("Rebuilt brokerResource for table " + tableName, true);
} catch (Exception e) {
LOGGER.warn("Caught exception while rebuilding broker resource from Helix tags for table {}", e, tableName);
return new PinotResourceManagerResponse("Failed to rebuild brokerResource for table " + tableName + " due to exception: " + e.getMessage(), false);
}
}
use of org.apache.helix.model.IdealState in project pinot by linkedin.
the class PinotHelixResourceManager method addNewOfflineSegment.
/**
* Helper method to add the passed in offline segment to the helix cluster.
* - Gets the segment name and the table name from the passed in segment meta-data.
* - Identifies the instance set onto which the segment needs to be added, based on
* segment assignment strategy and replicas in the table config in the property-store.
* - Updates ideal state such that the new segment is assigned to required set of instances as per
* the segment assignment strategy and replicas.
*
* @param segmentMetadata Meta-data for the segment, used to access segmentName and tableName.
* @throws JsonParseException
* @throws JsonMappingException
* @throws JsonProcessingException
* @throws JSONException
* @throws IOException
*/
private void addNewOfflineSegment(final SegmentMetadata segmentMetadata) throws JsonParseException, JsonMappingException, JsonProcessingException, JSONException, IOException {
final AbstractTableConfig offlineTableConfig = ZKMetadataProvider.getOfflineTableConfig(_propertyStore, segmentMetadata.getTableName());
final String segmentName = segmentMetadata.getName();
final String offlineTableName = TableNameBuilder.OFFLINE_TABLE_NAME_BUILDER.forTable(segmentMetadata.getTableName());
if (!SEGMENT_ASSIGNMENT_STRATEGY_MAP.containsKey(offlineTableName)) {
SEGMENT_ASSIGNMENT_STRATEGY_MAP.put(offlineTableName, SegmentAssignmentStrategyFactory.getSegmentAssignmentStrategy(offlineTableConfig.getValidationConfig().getSegmentAssignmentStrategy()));
}
final SegmentAssignmentStrategy segmentAssignmentStrategy = SEGMENT_ASSIGNMENT_STRATEGY_MAP.get(offlineTableName);
// Passing a callable to this api to avoid helixHelper having which is in pinot-common having to
// depend upon pinot-controller.
Callable<List<String>> getInstancesForSegment = new Callable<List<String>>() {
@Override
public List<String> call() throws Exception {
final IdealState currentIdealState = _helixAdmin.getResourceIdealState(_helixClusterName, offlineTableName);
final Set<String> currentInstanceSet = currentIdealState.getInstanceSet(segmentName);
if (currentInstanceSet.isEmpty()) {
final String serverTenant = ControllerTenantNameBuilder.getOfflineTenantNameForTenant(offlineTableConfig.getTenantConfig().getServer());
final int replicas = Integer.parseInt(offlineTableConfig.getValidationConfig().getReplication());
return segmentAssignmentStrategy.getAssignedInstances(_helixAdmin, _helixClusterName, segmentMetadata, replicas, serverTenant);
} else {
return new ArrayList<String>(currentIdealState.getInstanceSet(segmentName));
}
}
};
HelixHelper.addSegmentToIdealState(_helixZkManager, offlineTableName, segmentName, getInstancesForSegment);
}
Aggregations