Search in sources :

Example 26 with ZNRecord

use of org.apache.helix.zookeeper.datamodel.ZNRecord in project ambry by linkedin.

the class HelixClusterManager method initializeHelixManagerAndPropertyStoreInLocalDC.

/**
 * Initialize HelixManager in local datacenter and complete subscription of HelixPropertyStore to listen for
 * PartitionOverride zNode. This needs to happen before other datacenters are initialized so that any partition
 * overrides can be properly honored.
 * @param dataCenterToZkAddress the map mapping each datacenter to its corresponding ZkAddress.
 * @param instanceName the String representation of the instance associated with this manager.
 * @param helixFactory the factory class to construct and get a reference to a {@link HelixManager}.
 * @return the HelixManager of local datacenter, or {@code null} if the local datacenter is
 *         {@link ReplicaType#CLOUD_BACKED}, as we currently do not support getting cluster state from Helix for cloud
 *         datacenters.
 * @throws Exception
 */
private HelixManager initializeHelixManagerAndPropertyStoreInLocalDC(Map<String, DcZkInfo> dataCenterToZkAddress, String instanceName, HelixFactory helixFactory) throws Exception {
    DcZkInfo dcZkInfo = dataCenterToZkAddress.get(clusterMapConfig.clusterMapDatacenterName);
    if (dcZkInfo.getReplicaType() == ReplicaType.CLOUD_BACKED) {
        return null;
    }
    // For now, the first ZK endpoint (if there are more than one endpoints) will be adopted by default. Note that, Ambry
    // doesn't support multiple HelixClusterManagers(spectators) on same node.
    String zkConnectStr = dcZkInfo.getZkConnectStrs().get(0);
    HelixManager manager = helixFactory.getZkHelixManagerAndConnect(clusterName, instanceName, InstanceType.SPECTATOR, zkConnectStr);
    helixPropertyStoreInLocalDc = manager.getHelixPropertyStore();
    logger.info("HelixPropertyStore from local datacenter {} is: {}", dcZkInfo.getDcName(), helixPropertyStoreInLocalDc);
    IZkDataListener dataListener = new IZkDataListener() {

        @Override
        public void handleDataChange(String dataPath, Object data) {
            logger.info("Received data change notification for: {}", dataPath);
        }

        @Override
        public void handleDataDeleted(String dataPath) {
            logger.info("Received data delete notification for: {}", dataPath);
        }
    };
    logger.info("Subscribing data listener to HelixPropertyStore.");
    helixPropertyStoreInLocalDc.subscribeDataChanges(PARTITION_OVERRIDE_ZNODE_PATH, dataListener);
    logger.info("Getting PartitionOverride ZNRecord from HelixPropertyStore");
    ZNRecord zNRecord = helixPropertyStoreInLocalDc.get(PARTITION_OVERRIDE_ZNODE_PATH, null, AccessOption.PERSISTENT);
    if (clusterMapConfig.clusterMapEnablePartitionOverride) {
        if (zNRecord != null) {
            partitionOverrideInfoMap.putAll(zNRecord.getMapFields());
            logger.info("partitionOverrideInfoMap is initialized!");
        } else {
            logger.warn("ZNRecord from HelixPropertyStore is NULL, the partitionOverrideInfoMap is empty.");
        }
    }
    return manager;
}
Also used : HelixManager(org.apache.helix.HelixManager) IZkDataListener(org.apache.helix.zookeeper.zkclient.IZkDataListener) JSONObject(org.json.JSONObject) ZNRecord(org.apache.helix.zookeeper.datamodel.ZNRecord)

Example 27 with ZNRecord

use of org.apache.helix.zookeeper.datamodel.ZNRecord in project ambry by linkedin.

the class HelixClusterManager method getBootstrapReplica.

/**
 * {@inheritDoc}
 * To create bootstrap replica, {@link HelixClusterManager} needs to fetch replica info (i.e. capacity, mount path)
 * from Helix PropertyStore. This method looks up the ZNode in local datacenter and does some validation. Right now,
 * {@link HelixClusterManager} supports getting bootstrap replica of new partition but it doesn't support getting replica
 * residing on hosts that are not present in clustermap.
 * The ZNRecord of REPLICA_ADDITION_ZNODE has following format in mapFields.
 * <pre>
 * "mapFields": {
 *     "1": {
 *         "replicaCapacityInBytes": 107374182400,
 *         "partitionClass": "max-replicas-all-datacenters",
 *         "localhost1_17088": "/tmp/c/1",
 *         "localhost2_17088": "/tmp/d/1"
 *     },
 *     "2": {
 *         "replicaCapacityInBytes": 107374182400,
 *         "partitionClass": "max-replicas-all-datacenters",
 *         "localhost3_17088": "/tmp/e/1"
 *     }
 * }
 * </pre>
 * In above example, two bootstrap replicas of partition[1] will be added to localhost1 and localhost2 respectively.
 * The host name is followed by mount path on which the bootstrap replica should be placed.
 */
@Override
public ReplicaId getBootstrapReplica(String partitionIdStr, DataNodeId dataNodeId) {
    ReplicaId bootstrapReplica = null;
    logger.info("Getting ReplicaAddition ZNRecord from HelixPropertyStore in local DC.");
    ZNRecord zNRecord = helixPropertyStoreInLocalDc.get(REPLICA_ADDITION_ZNODE_PATH, null, AccessOption.PERSISTENT);
    if (zNRecord == null) {
        logger.warn("ZNRecord from HelixPropertyStore is NULL, partition to replicaInfo map doesn't exist.");
        return null;
    }
    String instanceName = getInstanceName(dataNodeId.getHostname(), dataNodeId.getPort());
    Map<String, Map<String, String>> partitionToReplicas = zNRecord.getMapFields();
    Map<String, String> replicaInfos = partitionToReplicas.get(partitionIdStr);
    if (replicaInfos == null || !replicaInfos.containsKey(instanceName)) {
        logger.warn("Partition {} or replica on host {} is not found in replica info map", partitionIdStr, instanceName);
        return null;
    }
    long replicaCapacity = Long.parseLong(replicaInfos.get(REPLICAS_CAPACITY_STR));
    String partitionClass = replicaInfos.get(PARTITION_CLASS_STR);
    AmbryPartition mappedPartition = new AmbryPartition(Long.parseLong(partitionIdStr), partitionClass, helixClusterManagerCallback);
    AmbryPartition currentPartition = partitionNameToAmbryPartition.putIfAbsent(mappedPartition.toPathString(), mappedPartition);
    if (currentPartition == null) {
        logger.info("Partition {} is currently not present in cluster map, a new partition is created", partitionIdStr);
        currentPartition = mappedPartition;
    }
    // Check if data node or disk is in current cluster map, if not, set bootstrapReplica to null.
    ClusterChangeHandler localClusterChangeHandler = dcToDcInfo.get(clusterMapConfig.clusterMapDatacenterName).clusterChangeHandler;
    AmbryDataNode dataNode = localClusterChangeHandler.getDataNode(instanceName);
    String mountPathAndDiskCapacityFromHelix = replicaInfos.get(instanceName);
    String[] segments = mountPathAndDiskCapacityFromHelix.split(DISK_CAPACITY_DELIM_STR);
    String mountPath = segments[0];
    String diskCapacityStr = segments.length >= 2 ? segments[1] : null;
    Set<AmbryDisk> disks = dataNode != null ? localClusterChangeHandler.getDisks(dataNode) : null;
    Optional<AmbryDisk> potentialDisk = disks != null ? disks.stream().filter(d -> d.getMountPath().equals(mountPath)).findAny() : Optional.empty();
    if (potentialDisk.isPresent()) {
        try {
            AmbryDisk targetDisk = potentialDisk.get();
            if (diskCapacityStr != null) {
                // update disk capacity if bootstrap replica info contains disk capacity in bytes.
                targetDisk.setDiskCapacityInBytes(Long.parseLong(diskCapacityStr));
            }
            bootstrapReplica = new AmbryServerReplica(clusterMapConfig, currentPartition, targetDisk, true, replicaCapacity, false);
        } catch (Exception e) {
            logger.error("Failed to create bootstrap replica for partition {} on {} due to exception: ", partitionIdStr, instanceName, e);
            bootstrapReplica = null;
        }
    } else {
        logger.error("Either datanode or disk that associated with bootstrap replica is not found in cluster map. Cannot create the replica.");
    }
    // this map to clustermap related data structures that can be queried by other components.
    if (bootstrapReplica != null && instanceName.equals(selfInstanceName)) {
        // Note that this method might be called by several state transition threads concurrently.
        bootstrapReplicas.put(currentPartition.toPathString(), bootstrapReplica);
    }
    return bootstrapReplica;
}
Also used : IOException(java.io.IOException) HashMap(java.util.HashMap) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) ZNRecord(org.apache.helix.zookeeper.datamodel.ZNRecord)

Example 28 with ZNRecord

use of org.apache.helix.zookeeper.datamodel.ZNRecord in project ambry by linkedin.

the class HelixBootstrapUpgradeUtil method uploadClusterAdminInfos.

/**
 * Uploads cluster config infos onto Helix PropertyStore.
 * @param adminInfosByDc the cluster admin information (overridden partitions, added replicas) grouped by DC that would
 *                       be applied to cluster.
 * @param clusterAdminType the type of cluster admin that would be uploaded (i.e. PartitionOverride, ReplicaAddition)
 * @param adminConfigZNodePath ZNode path of admin config associated with clusterAdminType.
 */
private void uploadClusterAdminInfos(Map<String, Map<String, Map<String, String>>> adminInfosByDc, String clusterAdminType, String adminConfigZNodePath) {
    for (String dcName : dataCenterToZkAddress.keySet()) {
        info("Uploading {} infos for datacenter {}.", clusterAdminType, dcName);
        HelixPropertyStore<ZNRecord> helixPropertyStore = createHelixPropertyStore(dcName);
        try {
            ZNRecord znRecord = new ZNRecord(clusterAdminType);
            znRecord.setMapFields(adminInfosByDc.get(dcName));
            if (!helixPropertyStore.set(adminConfigZNodePath, znRecord, AccessOption.PERSISTENT)) {
                logger.error("Failed to upload {} infos for datacenter {}", clusterAdminType, dcName);
            }
        } finally {
            helixPropertyStore.stop();
        }
    }
}
Also used : ZNRecord(org.apache.helix.zookeeper.datamodel.ZNRecord)

Example 29 with ZNRecord

use of org.apache.helix.zookeeper.datamodel.ZNRecord in project ambry by linkedin.

the class HelixHealthReportAggregatorTask method run.

@Override
public TaskResult run() {
    Pair<StatsSnapshot, StatsSnapshot> results = null;
    Exception exception = null;
    try {
        HelixDataAccessor helixDataAccessor = manager.getHelixDataAccessor();
        List<String> instanceNames = manager.getClusterManagmentTool().getInstancesInCluster(manager.getClusterName());
        Map<String, String> statsWrappersJSON = new HashMap<>();
        for (String instanceName : instanceNames) {
            PropertyKey.Builder keyBuilder = helixDataAccessor.keyBuilder();
            HelixProperty record = helixDataAccessor.getProperty(keyBuilder.healthReport(instanceName, healthReportName));
            if (record != null && record.getRecord() != null) {
                statsWrappersJSON.put(instanceName, record.getRecord().getSimpleField(statsFieldName));
            }
        }
        ObjectMapper mapper = new ObjectMapper();
        results = clusterAggregator.doWork(statsWrappersJSON, statsReportType);
        String resultId = String.format("%s%s", AGGREGATED_REPORT_PREFIX, healthReportName);
        ZNRecord znRecord = new ZNRecord(resultId);
        znRecord.setSimpleField(RAW_VALID_SIZE_FIELD_NAME, mapper.writeValueAsString(results.getFirst()));
        znRecord.setSimpleField(VALID_SIZE_FIELD_NAME, mapper.writeValueAsString(results.getSecond()));
        znRecord.setSimpleField(TIMESTAMP_FIELD_NAME, String.valueOf(time.milliseconds()));
        znRecord.setListField(ERROR_OCCURRED_INSTANCES_FIELD_NAME, clusterAggregator.getExceptionOccurredInstances(statsReportType));
        String path = String.format("/%s", resultId);
        manager.getHelixPropertyStore().set(path, znRecord, AccessOption.PERSISTENT);
        return new TaskResult(TaskResult.Status.COMPLETED, "Aggregation success");
    } catch (Exception e) {
        logger.error("Exception thrown while aggregating stats from health reports across all nodes ", e);
        exception = e;
        return new TaskResult(TaskResult.Status.FAILED, "Exception thrown");
    } finally {
        if (clusterMapConfig.clustermapEnableContainerDeletionAggregation && callback != null && results != null && statsReportType.equals(StatsReportType.ACCOUNT_REPORT)) {
            callback.onCompletion(results.getFirst(), exception);
        }
    }
}
Also used : HashMap(java.util.HashMap) HelixDataAccessor(org.apache.helix.HelixDataAccessor) HelixProperty(org.apache.helix.HelixProperty) TaskResult(org.apache.helix.task.TaskResult) PropertyKey(org.apache.helix.PropertyKey) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) ZNRecord(org.apache.helix.zookeeper.datamodel.ZNRecord) StatsSnapshot(com.github.ambry.server.StatsSnapshot)

Example 30 with ZNRecord

use of org.apache.helix.zookeeper.datamodel.ZNRecord in project ambry by linkedin.

the class HelixAccountServiceTest method testReadBadZNRecordCase1.

/**
 * Tests reading {@link ZNRecord} from {@link HelixPropertyStore}, where the {@link ZNRecord} is empty. This is a
 * good {@link ZNRecord} format that should NOT fail fetch or update.
 * @throws Exception Any unexpected exception.
 */
@Test
public void testReadBadZNRecordCase1() throws Exception {
    ZNRecord zNRecord = makeZNRecordWithSimpleField(null, null, null);
    updateAndWriteZNRecord(zNRecord, true);
}
Also used : ZNRecord(org.apache.helix.zookeeper.datamodel.ZNRecord) Test(org.junit.Test)

Aggregations

ZNRecord (org.apache.helix.zookeeper.datamodel.ZNRecord)37 HashMap (java.util.HashMap)19 Test (org.junit.Test)18 Map (java.util.Map)10 ArrayList (java.util.ArrayList)8 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)8 VerifiableProperties (com.github.ambry.config.VerifiableProperties)6 HashSet (java.util.HashSet)6 InstanceConfig (org.apache.helix.model.InstanceConfig)6 MetricRegistry (com.codahale.metrics.MetricRegistry)5 Properties (java.util.Properties)5 Stat (org.apache.zookeeper.data.Stat)5 ClusterMapConfig (com.github.ambry.config.ClusterMapConfig)4 HelixPropertyStoreConfig (com.github.ambry.config.HelixPropertyStoreConfig)4 IOException (java.io.IOException)4 JSONObject (org.json.JSONObject)4 List (java.util.List)3 Random (java.util.Random)3 PropertyKey (org.apache.helix.PropertyKey)3 IdealState (org.apache.helix.model.IdealState)3