Search in sources :

Example 11 with ZNRecord

use of org.apache.helix.ZNRecord in project pinot by linkedin.

the class SegmentStatusChecker method runSegmentMetrics.

/**
   * Runs a segment status pass over the currently loaded tables.
   */
public void runSegmentMetrics() {
    if (!_pinotHelixResourceManager.isLeader()) {
        LOGGER.info("Skipping Segment Status check, not leader!");
        setStatusToDefault();
        stop();
        return;
    }
    long startTime = System.nanoTime();
    LOGGER.info("Starting Segment Status check for metrics");
    // Fetch the list of tables
    List<String> allTableNames = _pinotHelixResourceManager.getAllPinotTableNames();
    String helixClusterName = _pinotHelixResourceManager.getHelixClusterName();
    HelixAdmin helixAdmin = _pinotHelixResourceManager.getHelixAdmin();
    int realTimeTableCount = 0;
    int offlineTableCount = 0;
    ZkHelixPropertyStore<ZNRecord> propertyStore = _pinotHelixResourceManager.getPropertyStore();
    for (String tableName : allTableNames) {
        if (TableNameBuilder.getTableTypeFromTableName(tableName).equals(CommonConstants.Helix.TableType.OFFLINE)) {
            offlineTableCount++;
        } else {
            realTimeTableCount++;
        }
        IdealState idealState = helixAdmin.getResourceIdealState(helixClusterName, tableName);
        if ((idealState == null) || (idealState.getPartitionSet().isEmpty())) {
            _metricsRegistry.setValueOfTableGauge(tableName, ControllerGauge.NUMBER_OF_REPLICAS, 1);
            _metricsRegistry.setValueOfTableGauge(tableName, ControllerGauge.PERCENT_OF_REPLICAS, 100);
            _metricsRegistry.setValueOfTableGauge(tableName, ControllerGauge.PERCENT_SEGMENTS_AVAILABLE, 100);
            continue;
        }
        _metricsRegistry.setValueOfTableGauge(tableName, ControllerGauge.IDEALSTATE_ZNODE_SIZE, idealState.toString().length());
        ExternalView externalView = helixAdmin.getResourceExternalView(helixClusterName, tableName);
        // Keeps track of maximum number of replicas in ideal state
        int nReplicasIdealMax = 0;
        // Keeps track of minimum number of replicas in external view
        int nReplicasExternal = -1;
        // Keeps track of number of segments in error state
        int nErrors = 0;
        // Keeeps track of number segments with no online replicas
        int nOffline = 0;
        // Counts number of segments
        int nSegments = 0;
        for (String partitionName : idealState.getPartitionSet()) {
            int nReplicas = 0;
            int nIdeal = 0;
            nSegments++;
            // Skip segments not online in ideal state
            for (Map.Entry<String, String> serverAndState : idealState.getInstanceStateMap(partitionName).entrySet()) {
                if (serverAndState == null) {
                    break;
                }
                if (serverAndState.getValue().equals(ONLINE)) {
                    nIdeal++;
                    break;
                }
            }
            if (nIdeal == 0) {
                // No online segments in ideal state
                continue;
            }
            nReplicasIdealMax = (idealState.getInstanceStateMap(partitionName).size() > nReplicasIdealMax) ? idealState.getInstanceStateMap(partitionName).size() : nReplicasIdealMax;
            if ((externalView == null) || (externalView.getStateMap(partitionName) == null)) {
                // No replicas for this segment
                TableType tableType = TableNameBuilder.getTableTypeFromTableName(tableName);
                if ((tableType != null) && (tableType.equals(TableType.OFFLINE))) {
                    OfflineSegmentZKMetadata segmentZKMetadata = ZKMetadataProvider.getOfflineSegmentZKMetadata(propertyStore, tableName, partitionName);
                    if (segmentZKMetadata != null && segmentZKMetadata.getPushTime() > System.currentTimeMillis() - _waitForPushTimeSeconds * 1000) {
                        // push not yet finished, skip
                        continue;
                    }
                }
                nOffline++;
                if (nOffline < MaxOfflineSegmentsToLog) {
                    LOGGER.warn("Segment {} of table {} has no replicas", partitionName, tableName);
                }
                nReplicasExternal = 0;
                continue;
            }
            for (Map.Entry<String, String> serverAndState : externalView.getStateMap(partitionName).entrySet()) {
                // Count number of online replicas
                if (serverAndState.getValue().equals(ONLINE)) {
                    nReplicas++;
                }
                if (serverAndState.getValue().equals(ERROR)) {
                    nErrors++;
                }
            }
            if (nReplicas == 0) {
                if (nOffline < MaxOfflineSegmentsToLog) {
                    LOGGER.warn("Segment {} of table {} has no online replicas", partitionName, tableName);
                }
                nOffline++;
            }
            nReplicasExternal = ((nReplicasExternal > nReplicas) || (nReplicasExternal == -1)) ? nReplicas : nReplicasExternal;
        }
        if (nReplicasExternal == -1) {
            nReplicasExternal = (nReplicasIdealMax == 0) ? 1 : 0;
        }
        // Synchronization provided by Controller Gauge to make sure that only one thread updates the gauge
        _metricsRegistry.setValueOfTableGauge(tableName, ControllerGauge.NUMBER_OF_REPLICAS, nReplicasExternal);
        _metricsRegistry.setValueOfTableGauge(tableName, ControllerGauge.PERCENT_OF_REPLICAS, (nReplicasIdealMax > 0) ? (nReplicasExternal * 100 / nReplicasIdealMax) : 100);
        _metricsRegistry.setValueOfTableGauge(tableName, ControllerGauge.SEGMENTS_IN_ERROR_STATE, nErrors);
        _metricsRegistry.setValueOfTableGauge(tableName, ControllerGauge.PERCENT_SEGMENTS_AVAILABLE, (nSegments > 0) ? (100 - (nOffline * 100 / nSegments)) : 100);
        if (nOffline > 0) {
            LOGGER.warn("Table {} has {} segments with no online replicas", tableName, nOffline);
        }
        if (nReplicasExternal < nReplicasIdealMax) {
            LOGGER.warn("Table {} has {} replicas, below replication threshold :{}", tableName, nReplicasExternal, nReplicasIdealMax);
        }
    }
    _metricsRegistry.setValueOfGlobalGauge(ControllerGauge.REALTIME_TABLE_COUNT, realTimeTableCount);
    _metricsRegistry.setValueOfGlobalGauge(ControllerGauge.OFFLINE_TABLE_COUNT, offlineTableCount);
    long totalNanos = System.nanoTime() - startTime;
    LOGGER.info("Segment status metrics completed in {}ms", TimeUnit.MILLISECONDS.convert(totalNanos, TimeUnit.NANOSECONDS));
}
Also used : ExternalView(org.apache.helix.model.ExternalView) TableType(com.linkedin.pinot.common.utils.CommonConstants.Helix.TableType) OfflineSegmentZKMetadata(com.linkedin.pinot.common.metadata.segment.OfflineSegmentZKMetadata) HelixAdmin(org.apache.helix.HelixAdmin) IdealState(org.apache.helix.model.IdealState) ZNRecord(org.apache.helix.ZNRecord)

Example 12 with ZNRecord

use of org.apache.helix.ZNRecord in project pinot by linkedin.

the class PinotHelixResourceManager method getSchema.

/**
   *
   * @param schemaName
   * @return
   * @throws JsonParseException
   * @throws JsonMappingException
   * @throws IOException
   */
@Nullable
public Schema getSchema(String schemaName) throws JsonParseException, JsonMappingException, IOException {
    PinotHelixPropertyStoreZnRecordProvider propertyStoreHelper = PinotHelixPropertyStoreZnRecordProvider.forSchema(_propertyStore);
    ZNRecord record = propertyStoreHelper.get(schemaName);
    return record != null ? SchemaUtils.fromZNRecord(record) : null;
}
Also used : PinotHelixPropertyStoreZnRecordProvider(com.linkedin.pinot.common.utils.helix.PinotHelixPropertyStoreZnRecordProvider) ZNRecord(org.apache.helix.ZNRecord) Nullable(javax.annotation.Nullable)

Example 13 with ZNRecord

use of org.apache.helix.ZNRecord in project pinot by linkedin.

the class PinotSegmentRestletResource method getSegmentMetaData.

/**
   * Get meta-data for segment of table. Table name is the suffixed (offline/realtime)
   * name.
   * @param tableName: Suffixed (realtime/offline) table Name
   * @param segmentName: Segment for which to get the meta-data.
   * @return
   * @throws JSONException
   */
private StringRepresentation getSegmentMetaData(String tableName, String segmentName, TableType tableType) throws JSONException {
    if (!ZKMetadataProvider.isSegmentExisted(_pinotHelixResourceManager.getPropertyStore(), tableName, segmentName)) {
        String error = new String("Error: segment " + segmentName + " not found.");
        LOGGER.error(error);
        setStatus(Status.CLIENT_ERROR_BAD_REQUEST);
        return new StringRepresentation(error);
    }
    JSONArray ret = new JSONArray();
    JSONObject jsonObj = new JSONObject();
    jsonObj.put(TABLE_NAME, tableName);
    ZkHelixPropertyStore<ZNRecord> propertyStore = _pinotHelixResourceManager.getPropertyStore();
    if (tableType == tableType.OFFLINE) {
        OfflineSegmentZKMetadata offlineSegmentZKMetadata = ZKMetadataProvider.getOfflineSegmentZKMetadata(propertyStore, tableName, segmentName);
        jsonObj.put(STATE, offlineSegmentZKMetadata.toMap());
    }
    if (tableType == TableType.REALTIME) {
        RealtimeSegmentZKMetadata realtimeSegmentZKMetadata = ZKMetadataProvider.getRealtimeSegmentZKMetadata(propertyStore, tableName, segmentName);
        jsonObj.put(STATE, realtimeSegmentZKMetadata.toMap());
    }
    ret.put(jsonObj);
    return new StringRepresentation(ret.toString());
}
Also used : RealtimeSegmentZKMetadata(com.linkedin.pinot.common.metadata.segment.RealtimeSegmentZKMetadata) JSONObject(org.json.JSONObject) StringRepresentation(org.restlet.representation.StringRepresentation) OfflineSegmentZKMetadata(com.linkedin.pinot.common.metadata.segment.OfflineSegmentZKMetadata) JSONArray(org.json.JSONArray) ZNRecord(org.apache.helix.ZNRecord)

Example 14 with ZNRecord

use of org.apache.helix.ZNRecord in project pinot by linkedin.

the class PinotHelixResourceManager method addTable.

/**
   * Table APIs
   */
public void addTable(AbstractTableConfig config) throws JsonGenerationException, JsonMappingException, IOException {
    TenantConfig tenantConfig = null;
    TableType type = TableType.valueOf(config.getTableType().toUpperCase());
    if (isSingleTenantCluster()) {
        tenantConfig = new TenantConfig();
        tenantConfig.setBroker(ControllerTenantNameBuilder.getBrokerTenantNameForTenant(ControllerTenantNameBuilder.DEFAULT_TENANT_NAME));
        switch(type) {
            case OFFLINE:
                tenantConfig.setServer(ControllerTenantNameBuilder.getOfflineTenantNameForTenant(ControllerTenantNameBuilder.DEFAULT_TENANT_NAME));
                break;
            case REALTIME:
                tenantConfig.setServer(ControllerTenantNameBuilder.getRealtimeTenantNameForTenant(ControllerTenantNameBuilder.DEFAULT_TENANT_NAME));
                break;
            default:
                throw new RuntimeException("UnSupported table type");
        }
        config.setTenantConfig(tenantConfig);
    } else {
        tenantConfig = config.getTenantConfig();
        if (tenantConfig.getBroker() == null || tenantConfig.getServer() == null) {
            throw new RuntimeException("missing tenant configs");
        }
    }
    SegmentsValidationAndRetentionConfig segmentsConfig = config.getValidationConfig();
    switch(type) {
        case OFFLINE:
            final String offlineTableName = config.getTableName();
            // now lets build an ideal state
            LOGGER.info("building empty ideal state for table : " + offlineTableName);
            final IdealState offlineIdealState = PinotTableIdealStateBuilder.buildEmptyIdealStateFor(offlineTableName, Integer.parseInt(segmentsConfig.getReplication()));
            LOGGER.info("adding table via the admin");
            _helixAdmin.addResource(_helixClusterName, offlineTableName, offlineIdealState);
            LOGGER.info("successfully added the table : " + offlineTableName + " to the cluster");
            // lets add table configs
            ZKMetadataProvider.setOfflineTableConfig(_propertyStore, offlineTableName, AbstractTableConfig.toZnRecord(config));
            _propertyStore.create(ZKMetadataProvider.constructPropertyStorePathForResource(offlineTableName), new ZNRecord(offlineTableName), AccessOption.PERSISTENT);
            break;
        case REALTIME:
            final String realtimeTableName = config.getTableName();
            // lets add table configs
            ZKMetadataProvider.setRealtimeTableConfig(_propertyStore, realtimeTableName, AbstractTableConfig.toZnRecord(config));
            /*
         * PinotRealtimeSegmentManager sets up watches on table and segment path. When a table gets created,
         * it expects the INSTANCE path in propertystore to be set up so that it can get the kafka group ID and
         * create (high-level consumer) segments for that table.
         * So, we need to set up the instance first, before adding the table resource for HLC new table creation.
         *
         * For low-level consumers, the order is to create the resource first, and set up the propertystore with segments
         * and then tweak the idealstate to add those segments.
         *
         * We also need to support the case when a high-level consumer already exists for a table and we are adding
         * the low-level consumers.
         */
            IndexingConfig indexingConfig = config.getIndexingConfig();
            ensureRealtimeClusterIsSetUp(config, realtimeTableName, indexingConfig);
            LOGGER.info("Successfully added or updated the table {} ", realtimeTableName);
            break;
        default:
            throw new RuntimeException("UnSupported table type");
    }
    handleBrokerResource(config);
}
Also used : IndexingConfig(com.linkedin.pinot.common.config.IndexingConfig) TableType(com.linkedin.pinot.common.utils.CommonConstants.Helix.TableType) TenantConfig(com.linkedin.pinot.common.config.TenantConfig) SegmentsValidationAndRetentionConfig(com.linkedin.pinot.common.config.SegmentsValidationAndRetentionConfig) IdealState(org.apache.helix.model.IdealState) ZNRecord(org.apache.helix.ZNRecord)

Example 15 with ZNRecord

use of org.apache.helix.ZNRecord in project pinot by linkedin.

the class HelixSetupUtils method createHelixClusterIfNeeded.

public static void createHelixClusterIfNeeded(String helixClusterName, String zkPath, boolean isUpdateStateModel) {
    final HelixAdmin admin = new ZKHelixAdmin(zkPath);
    final String segmentStateModelName = PinotHelixSegmentOnlineOfflineStateModelGenerator.PINOT_SEGMENT_ONLINE_OFFLINE_STATE_MODEL;
    if (admin.getClusters().contains(helixClusterName)) {
        LOGGER.info("cluster already exists ********************************************* ");
        if (isUpdateStateModel) {
            final StateModelDefinition curStateModelDef = admin.getStateModelDef(helixClusterName, segmentStateModelName);
            List<String> states = curStateModelDef.getStatesPriorityList();
            if (states.contains(PinotHelixSegmentOnlineOfflineStateModelGenerator.CONSUMING_STATE)) {
                LOGGER.info("State model {} already updated to contain CONSUMING state", segmentStateModelName);
                return;
            } else {
                LOGGER.info("Updating {} to add states for low level kafka consumers", segmentStateModelName);
                StateModelDefinition newStateModelDef = PinotHelixSegmentOnlineOfflineStateModelGenerator.generatePinotStateModelDefinition();
                ZkClient zkClient = new ZkClient(zkPath);
                zkClient.waitUntilConnected(20, TimeUnit.SECONDS);
                zkClient.setZkSerializer(new ZNRecordSerializer());
                HelixDataAccessor accessor = new ZKHelixDataAccessor(helixClusterName, new ZkBaseDataAccessor<ZNRecord>(zkClient));
                PropertyKey.Builder keyBuilder = accessor.keyBuilder();
                accessor.setProperty(keyBuilder.stateModelDef(segmentStateModelName), newStateModelDef);
                LOGGER.info("Completed updating statemodel {}", segmentStateModelName);
                zkClient.close();
            }
        }
        return;
    }
    LOGGER.info("Creating a new cluster, as the helix cluster : " + helixClusterName + " was not found ********************************************* ");
    admin.addCluster(helixClusterName, false);
    LOGGER.info("Enable auto join.");
    final HelixConfigScope scope = new HelixConfigScopeBuilder(ConfigScopeProperty.CLUSTER).forCluster(helixClusterName).build();
    final Map<String, String> props = new HashMap<String, String>();
    props.put(ZKHelixManager.ALLOW_PARTICIPANT_AUTO_JOIN, String.valueOf(true));
    //we need only one segment to be loaded at a time
    props.put(MessageType.STATE_TRANSITION + "." + HelixTaskExecutor.MAX_THREADS, String.valueOf(1));
    admin.setConfig(scope, props);
    LOGGER.info("Adding state model {} (with CONSUMED state) generated using {} **********************************************", segmentStateModelName, PinotHelixSegmentOnlineOfflineStateModelGenerator.class.toString());
    // If this is a fresh cluster we are creating, then the cluster will see the CONSUMING state in the
    // state model. But then the servers will never be asked to go to that STATE (whether they have the code
    // to handle it or not) unil we complete the feature using low-level kafka consumers and turn the feature on.
    admin.addStateModelDef(helixClusterName, segmentStateModelName, PinotHelixSegmentOnlineOfflineStateModelGenerator.generatePinotStateModelDefinition());
    LOGGER.info("Adding state model definition named : " + PinotHelixBrokerResourceOnlineOfflineStateModelGenerator.PINOT_BROKER_RESOURCE_ONLINE_OFFLINE_STATE_MODEL + " generated using : " + PinotHelixBrokerResourceOnlineOfflineStateModelGenerator.class.toString() + " ********************************************** ");
    admin.addStateModelDef(helixClusterName, PinotHelixBrokerResourceOnlineOfflineStateModelGenerator.PINOT_BROKER_RESOURCE_ONLINE_OFFLINE_STATE_MODEL, PinotHelixBrokerResourceOnlineOfflineStateModelGenerator.generatePinotStateModelDefinition());
    LOGGER.info("Adding empty ideal state for Broker!");
    HelixHelper.updateResourceConfigsFor(new HashMap<String, String>(), CommonConstants.Helix.BROKER_RESOURCE_INSTANCE, helixClusterName, admin);
    IdealState idealState = PinotTableIdealStateBuilder.buildEmptyIdealStateForBrokerResource(admin, helixClusterName);
    admin.setResourceIdealState(helixClusterName, CommonConstants.Helix.BROKER_RESOURCE_INSTANCE, idealState);
    initPropertyStorePath(helixClusterName, zkPath);
    LOGGER.info("New Cluster setup completed... ********************************************** ");
}
Also used : ZkClient(org.apache.helix.manager.zk.ZkClient) HashMap(java.util.HashMap) HelixConfigScopeBuilder(org.apache.helix.model.builder.HelixConfigScopeBuilder) HelixAdmin(org.apache.helix.HelixAdmin) ZKHelixAdmin(org.apache.helix.manager.zk.ZKHelixAdmin) IdealState(org.apache.helix.model.IdealState) ZKHelixDataAccessor(org.apache.helix.manager.zk.ZKHelixDataAccessor) HelixDataAccessor(org.apache.helix.HelixDataAccessor) ZKHelixAdmin(org.apache.helix.manager.zk.ZKHelixAdmin) StateModelDefinition(org.apache.helix.model.StateModelDefinition) PinotHelixSegmentOnlineOfflineStateModelGenerator(com.linkedin.pinot.controller.helix.core.PinotHelixSegmentOnlineOfflineStateModelGenerator) HelixConfigScope(org.apache.helix.model.HelixConfigScope) PinotHelixBrokerResourceOnlineOfflineStateModelGenerator(com.linkedin.pinot.controller.helix.core.PinotHelixBrokerResourceOnlineOfflineStateModelGenerator) ZNRecord(org.apache.helix.ZNRecord) PropertyKey(org.apache.helix.PropertyKey) ZNRecordSerializer(org.apache.helix.manager.zk.ZNRecordSerializer) ZKHelixDataAccessor(org.apache.helix.manager.zk.ZKHelixDataAccessor)

Aggregations

ZNRecord (org.apache.helix.ZNRecord)80 ArrayList (java.util.ArrayList)23 IdealState (org.apache.helix.model.IdealState)15 Test (org.testng.annotations.Test)13 LLCRealtimeSegmentZKMetadata (com.linkedin.pinot.common.metadata.segment.LLCRealtimeSegmentZKMetadata)12 HashMap (java.util.HashMap)12 LLCSegmentName (com.linkedin.pinot.common.utils.LLCSegmentName)11 AbstractTableConfig (com.linkedin.pinot.common.config.AbstractTableConfig)10 RealtimeSegmentZKMetadata (com.linkedin.pinot.common.metadata.segment.RealtimeSegmentZKMetadata)8 HelixAdmin (org.apache.helix.HelixAdmin)8 ZNRecordSerializer (org.apache.helix.manager.zk.ZNRecordSerializer)8 HashSet (java.util.HashSet)6 List (java.util.List)6 ExternalView (org.apache.helix.model.ExternalView)6 OfflineSegmentZKMetadata (com.linkedin.pinot.common.metadata.segment.OfflineSegmentZKMetadata)5 KafkaStreamMetadata (com.linkedin.pinot.common.metadata.stream.KafkaStreamMetadata)5 BeforeTest (org.testng.annotations.BeforeTest)5 TableType (com.linkedin.pinot.common.utils.CommonConstants.Helix.TableType)4 PinotHelixPropertyStoreZnRecordProvider (com.linkedin.pinot.common.utils.helix.PinotHelixPropertyStoreZnRecordProvider)4 ZkClient (org.apache.helix.manager.zk.ZkClient)4