Search in sources :

Example 21 with StatsSnapshot

use of com.github.ambry.server.StatsSnapshot in project ambry by linkedin.

the class HelixClusterAggregator method doWork.

/**
 * Take a {@link Map} of instance name to JSON string representation of {@link StatsWrapper} objects and perform cluster wide
 * aggregation with them.
 * @param statsWrappersJSON a {@link Map} of instance name to JSON string representation of {@link StatsWrapper} objects from the
 *                          node level
 * @return a {@link Pair} of Strings whose values represents valid quota stats across all partitions.
 * First element is the raw (sum) aggregated stats and second element is average(aggregated) stats for all replicas
 * for each partition.
 * @throws IOException
 */
Pair<String, String> doWork(Map<String, String> statsWrappersJSON) throws IOException {
    StatsSnapshot partitionSnapshot = new StatsSnapshot(0L, new HashMap<String, StatsSnapshot>());
    Map<String, Long> partitionTimestampMap = new HashMap<>();
    StatsSnapshot rawPartitionSnapshot = new StatsSnapshot(0L, new HashMap<String, StatsSnapshot>());
    for (Map.Entry<String, String> statsWrapperJSON : statsWrappersJSON.entrySet()) {
        if (statsWrapperJSON != null) {
            StatsWrapper snapshotWrapper = mapper.readValue(statsWrapperJSON.getValue(), StatsWrapper.class);
            StatsWrapper snapshotWrapperCopy = mapper.readValue(statsWrapperJSON.getValue(), StatsWrapper.class);
            combineRaw(rawPartitionSnapshot, snapshotWrapper);
            combine(partitionSnapshot, snapshotWrapperCopy, statsWrapperJSON.getKey(), partitionTimestampMap);
        }
    }
    if (logger.isTraceEnabled()) {
        logger.trace("Combined raw snapshot {}", mapper.writeValueAsString(rawPartitionSnapshot));
        logger.trace("Combined snapshot {}", mapper.writeValueAsString(partitionSnapshot));
    }
    StatsSnapshot reducedRawSnapshot = reduce(rawPartitionSnapshot);
    StatsSnapshot reducedSnapshot = reduce(partitionSnapshot);
    if (logger.isTraceEnabled()) {
        logger.trace("Reduced raw snapshot {}", mapper.writeValueAsString(reducedRawSnapshot));
        logger.trace("Reduced snapshot {}", mapper.writeValueAsString(reducedSnapshot));
    }
    return new Pair<>(mapper.writeValueAsString(reducedRawSnapshot), mapper.writeValueAsString(reducedSnapshot));
}
Also used : HashMap(java.util.HashMap) Map(java.util.Map) HashMap(java.util.HashMap) StatsWrapper(com.github.ambry.server.StatsWrapper) StatsSnapshot(com.github.ambry.server.StatsSnapshot) Pair(com.github.ambry.utils.Pair)

Example 22 with StatsSnapshot

use of com.github.ambry.server.StatsSnapshot in project ambry by linkedin.

the class HelixClusterAggregator method combineValidStatsByPartitionClass.

/**
 * Aggregate the given {@link StatsSnapshot} with the base {@link StatsSnapshot} by partition class. The aggregation uses
 * the same rules in {@link #combineValidStatsByAccount(StatsSnapshot, StatsWrapper, String, Map)}.
 *
 * The workflow of this method is as follows:
 * 1. Check if basePartitionClassMap contains given partitionClass. If yes, go to step 2; If not, directly put it into basePartitionClassMap
 *    and update partitionTimestampMap by adding all < partition, timestamp > pairs associated with given partitionClass
 * 2. For each partition in given partitionClass, check if basePartitionMap contains it. If yes, go to step 3;
 *    if not, put the partition into basePartitionMap and update partitionTimestampMap by adding the partition and its timestamp.
 * 3. Compute the delta value and delta time between given partition and existing one. Update partitionClassVal and
 *    partitionTimestampMap based on following rules:
 *      a) if abs(delta time) is within relevantTimePeriodInMs and delta value > 0, replace existing partition with given partition.
 *      b) if delta time > relevantTimePeriodInMs, which means given partition is newer than existing partition,
 *         then replace existing partition with given one.
 *      c) otherwise, ignore the partition(replica) because it is either stale or not the replica with largest value.
 * 4. update basePartitionClassMap with up-to-date basePartitionMap and totalValueOfAllClasses.
 * The combined snapshot is represented in following format:
 * <pre>
 * {
 *   value: 1000,
 *   subMap:{
 *     PartitionClass_1: {
 *       value: 400,
 *       subMap: {
 *         Partition[1]:{
 *           value: 400,
 *           subMap: {
 *             Account[1]_Container[1]:{
 *               value: 400,
 *               subMap: null
 *             }
 *           }
 *         }
 *       }
 *     },
 *     PartitionClass_2: {
 *       value: 600,
 *       subMap:{
 *         Partition[2]:{
 *           value: 600,
 *           subMap:{
 *             Account[2]_Container[2]:{
 *               value: 600,
 *               subMap: null
 *             }
 *           }
 *         }
 *       }
 *     }
 *   }
 * }
 * </pre>
 * @param baseSnapshot baseSnapshot the base {@link StatsSnapshot} which will contain the aggregated result
 * @param snapshotWrapper the {@link StatsSnapshot} from each instance to be aggregated to the base {@link StatsSnapshot}
 * @param instance new instance from which snapshot is being combined
 * @param partitionTimestampMap a {@link Map} of partition to timestamp. It keeps track the current timestamp of each
 *                              partition entry in the base {@link StatsSnapshot}
 */
private void combineValidStatsByPartitionClass(StatsSnapshot baseSnapshot, StatsWrapper snapshotWrapper, String instance, Map<String, Long> partitionTimestampMap) {
    Map<String, StatsSnapshot> partitionClassSnapshotMap = snapshotWrapper.getSnapshot().getSubMap();
    if (partitionClassSnapshotMap == null) {
        logger.info("There is no partition in given StatsSnapshot, skip aggregation on it.");
        return;
    }
    long totalValueOfAllClasses = baseSnapshot.getValue();
    long snapshotTimestamp = snapshotWrapper.getHeader().getTimestamp();
    Map<String, StatsSnapshot> basePartitionClassMap = baseSnapshot.getSubMap();
    for (Map.Entry<String, StatsSnapshot> partitionClassSnapshot : partitionClassSnapshotMap.entrySet()) {
        String partitionClassId = partitionClassSnapshot.getKey();
        if (basePartitionClassMap.containsKey(partitionClassId)) {
            long partitionClassVal = basePartitionClassMap.get(partitionClassId).getValue();
            Map<String, StatsSnapshot> basePartitionMap = basePartitionClassMap.get(partitionClassId).getSubMap();
            for (Map.Entry<String, StatsSnapshot> partitionToSnapshot : partitionClassSnapshot.getValue().getSubMap().entrySet()) {
                String partitionId = partitionToSnapshot.getKey();
                StatsSnapshot partitionSnapshot = partitionToSnapshot.getValue();
                if (basePartitionMap.containsKey(partitionId)) {
                    long deltaInValue = partitionSnapshot.getValue() - basePartitionMap.get(partitionId).getValue();
                    long deltaInTimeMs = snapshotTimestamp - partitionTimestampMap.get(partitionId);
                    if (Math.abs(deltaInTimeMs) < relevantTimePeriodInMs && deltaInValue > 0) {
                        basePartitionMap.put(partitionId, partitionSnapshot);
                        partitionTimestampMap.put(partitionId, snapshotTimestamp);
                        partitionClassVal += deltaInValue;
                        totalValueOfAllClasses += deltaInValue;
                    } else if (deltaInTimeMs > relevantTimePeriodInMs) {
                        basePartitionMap.put(partitionId, partitionSnapshot);
                        partitionTimestampMap.put(partitionId, snapshotTimestamp);
                        partitionClassVal += deltaInValue;
                        totalValueOfAllClasses += deltaInValue;
                    } else {
                        logger.trace("Ignoring snapshot from {} for partition {}", instance, partitionId);
                    }
                } else {
                    logger.trace("First partition: {} in partitionClass: {}", partitionId, partitionClassId);
                    basePartitionMap.put(partitionId, partitionSnapshot);
                    partitionTimestampMap.put(partitionId, snapshotTimestamp);
                    partitionClassVal += partitionSnapshot.getValue();
                    totalValueOfAllClasses += partitionSnapshot.getValue();
                }
            }
            // update partitionClass snapshot
            basePartitionClassMap.get(partitionClassId).setSubMap(basePartitionMap);
            basePartitionClassMap.get(partitionClassId).setValue(partitionClassVal);
        } else {
            logger.trace("First entry for partitionClass {} is from {}", partitionClassId, instance);
            basePartitionClassMap.put(partitionClassId, partitionClassSnapshot.getValue());
            // put all partitions associated with this partitionClass into partitionTimestampMap on their first occurrence.
            for (String partitionIdStr : partitionClassSnapshot.getValue().getSubMap().keySet()) {
                partitionTimestampMap.put(partitionIdStr, snapshotTimestamp);
            }
            // add aggregated value in this partition class to totalValue
            totalValueOfAllClasses += partitionClassSnapshot.getValue().getValue();
        }
    }
    baseSnapshot.setValue(totalValueOfAllClasses);
    baseSnapshot.setSubMap(basePartitionClassMap);
}
Also used : HashMap(java.util.HashMap) Map(java.util.Map) StatsSnapshot(com.github.ambry.server.StatsSnapshot)

Example 23 with StatsSnapshot

use of com.github.ambry.server.StatsSnapshot in project ambry by linkedin.

the class HelixHealthReportAggregatorTask method run.

@Override
public TaskResult run() {
    Pair<StatsSnapshot, StatsSnapshot> results = null;
    Exception exception = null;
    try {
        HelixDataAccessor helixDataAccessor = manager.getHelixDataAccessor();
        List<String> instanceNames = manager.getClusterManagmentTool().getInstancesInCluster(manager.getClusterName());
        Map<String, String> statsWrappersJSON = new HashMap<>();
        for (String instanceName : instanceNames) {
            PropertyKey.Builder keyBuilder = helixDataAccessor.keyBuilder();
            HelixProperty record = helixDataAccessor.getProperty(keyBuilder.healthReport(instanceName, healthReportName));
            if (record != null && record.getRecord() != null) {
                statsWrappersJSON.put(instanceName, record.getRecord().getSimpleField(statsFieldName));
            }
        }
        ObjectMapper mapper = new ObjectMapper();
        results = clusterAggregator.doWork(statsWrappersJSON, statsReportType);
        String resultId = String.format("%s%s", AGGREGATED_REPORT_PREFIX, healthReportName);
        ZNRecord znRecord = new ZNRecord(resultId);
        znRecord.setSimpleField(RAW_VALID_SIZE_FIELD_NAME, mapper.writeValueAsString(results.getFirst()));
        znRecord.setSimpleField(VALID_SIZE_FIELD_NAME, mapper.writeValueAsString(results.getSecond()));
        znRecord.setSimpleField(TIMESTAMP_FIELD_NAME, String.valueOf(time.milliseconds()));
        znRecord.setListField(ERROR_OCCURRED_INSTANCES_FIELD_NAME, clusterAggregator.getExceptionOccurredInstances(statsReportType));
        String path = String.format("/%s", resultId);
        manager.getHelixPropertyStore().set(path, znRecord, AccessOption.PERSISTENT);
        return new TaskResult(TaskResult.Status.COMPLETED, "Aggregation success");
    } catch (Exception e) {
        logger.error("Exception thrown while aggregating stats from health reports across all nodes ", e);
        exception = e;
        return new TaskResult(TaskResult.Status.FAILED, "Exception thrown");
    } finally {
        if (clusterMapConfig.clustermapEnableContainerDeletionAggregation && callback != null && results != null && statsReportType.equals(StatsReportType.ACCOUNT_REPORT)) {
            callback.onCompletion(results.getFirst(), exception);
        }
    }
}
Also used : HashMap(java.util.HashMap) HelixDataAccessor(org.apache.helix.HelixDataAccessor) HelixProperty(org.apache.helix.HelixProperty) TaskResult(org.apache.helix.task.TaskResult) PropertyKey(org.apache.helix.PropertyKey) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) ZNRecord(org.apache.helix.zookeeper.datamodel.ZNRecord) StatsSnapshot(com.github.ambry.server.StatsSnapshot)

Example 24 with StatsSnapshot

use of com.github.ambry.server.StatsSnapshot in project ambry by linkedin.

the class MySqlReportAggregatorTask method run.

@Override
public TaskResult run() {
    Exception exception = null;
    Histogram fetchTimeMs = statsReportType == StatsReportType.ACCOUNT_REPORT ? metrics.accountStatsFetchTimeMs : metrics.partitionClassStatsFetchTimeMs;
    Histogram aggregationTimeMs = statsReportType == StatsReportType.ACCOUNT_REPORT ? metrics.accountStatsAggregationTimeMs : metrics.partitionClassStatsAggregationTimeMs;
    long startTimeMs = System.currentTimeMillis();
    StatsSnapshot accountPhysicalStorageSnapshot = null;
    try {
        List<String> instanceNames = manager.getClusterManagmentTool().getInstancesInCluster(manager.getClusterName());
        if (statsReportType == StatsReportType.ACCOUNT_REPORT) {
            Map<String, HostAccountStorageStatsWrapper> accountStatsWrappers = fetchAccountStorageStatsWrapperForInstances(instanceNames);
            fetchTimeMs.update(System.currentTimeMillis() - startTimeMs);
            logger.info("Aggregating stats from " + accountStatsWrappers.size() + " hosts");
            Pair<AggregatedAccountStorageStats, AggregatedAccountStorageStats> results = clusterAggregator.aggregateHostAccountStorageStatsWrappers(accountStatsWrappers);
            if (clusterMapConfig.clustermapEnableDeleteInvalidDataInMysqlAggregationTask) {
                removeInvalidAggregatedAccountAndContainerStats(results.getSecond());
            }
            accountStatsStore.storeAggregatedAccountStorageStats(results.getSecond());
            accountPhysicalStorageSnapshot = StorageStatsUtil.convertAggregatedAccountStorageStatsToStatsSnapshot(results.getFirst(), true);
        } else if (statsReportType == StatsReportType.PARTITION_CLASS_REPORT) {
            Map<String, HostPartitionClassStorageStatsWrapper> statsWrappers = fetchPartitionClassStorageStatsWrapperForInstances(instanceNames);
            fetchTimeMs.update(System.currentTimeMillis() - startTimeMs);
            logger.info("Aggregating stats from " + statsWrappers.size() + " hosts");
            Pair<AggregatedPartitionClassStorageStats, AggregatedPartitionClassStorageStats> results = clusterAggregator.aggregateHostPartitionClassStorageStatsWrappers(statsWrappers);
            if (clusterMapConfig.clustermapEnableDeleteInvalidDataInMysqlAggregationTask) {
                removeInvalidAggregatedPartitionClassStats(results.getSecond());
            }
            accountStatsStore.storeAggregatedPartitionClassStorageStats(results.getSecond());
        }
        // Check if there is a base report for this month or not.
        if (clusterMapConfig.clustermapEnableAggregatedMonthlyAccountReport && statsReportType == StatsReportType.ACCOUNT_REPORT) {
            // Get the month, if not the same month, then copy the aggregated stats and update the month
            String currentMonthValue = LocalDateTime.ofEpochSecond(time.seconds(), 0, ZONE_OFFSET).format(TIMESTAMP_FORMATTER);
            String recordedMonthValue = accountStatsStore.queryRecordedMonth();
            if (recordedMonthValue == null || recordedMonthValue.isEmpty() || !currentMonthValue.equals(recordedMonthValue)) {
                if (clusterMapConfig.clustermapEnableDeleteInvalidDataInMysqlAggregationTask) {
                    accountStatsStore.deleteSnapshotOfAggregatedAccountStats();
                }
                logger.info("Taking snapshot of aggregated stats for month " + currentMonthValue);
                accountStatsStore.takeSnapshotOfAggregatedAccountStatsAndUpdateMonth(currentMonthValue);
            }
        }
        aggregationTimeMs.update(System.currentTimeMillis() - startTimeMs);
        return new TaskResult(TaskResult.Status.COMPLETED, "Aggregation success");
    } catch (Exception e) {
        logger.error("Exception thrown while aggregating stats from container stats reports across all nodes ", e);
        exception = e;
        return new TaskResult(TaskResult.Status.FAILED, "Exception thrown");
    } finally {
        if (clusterMapConfig.clustermapEnableContainerDeletionAggregation && callback != null && accountPhysicalStorageSnapshot != null && statsReportType.equals(StatsReportType.ACCOUNT_REPORT)) {
            callback.onCompletion(accountPhysicalStorageSnapshot, exception);
        }
    }
}
Also used : Histogram(com.codahale.metrics.Histogram) HostAccountStorageStatsWrapper(com.github.ambry.server.HostAccountStorageStatsWrapper) AggregatedAccountStorageStats(com.github.ambry.server.storagestats.AggregatedAccountStorageStats) TaskResult(org.apache.helix.task.TaskResult) HashMap(java.util.HashMap) Map(java.util.Map) StatsSnapshot(com.github.ambry.server.StatsSnapshot) Pair(com.github.ambry.utils.Pair)

Example 25 with StatsSnapshot

use of com.github.ambry.server.StatsSnapshot in project ambry by linkedin.

the class HelixClusterAggregatorTest method testDoWorkWithDiffNumberOfStores.

/**
 * Test stats aggregation with different number of stores on different nodes.
 * Only used for partitionClass aggregation testing.
 * @throws IOException
 */
@Test
public void testDoWorkWithDiffNumberOfStores() throws IOException {
    List<StatsSnapshot> storeSnapshots1 = new ArrayList<>();
    List<StatsSnapshot> storeSnapshots2 = new ArrayList<>();
    List<StatsSnapshot> storeSnapshots2Copy = new ArrayList<>();
    int seed = 1111;
    // storeSnapshots1 only has 2 store stats. storeSnapshots2 and storeSnapshots2Copy have 3 store stats each.
    for (int i = 3; i < 6; i++) {
        if (i < 5) {
            storeSnapshots1.add(TestUtils.generateStoreStats(i, 3, new Random(seed), StatsReportType.PARTITION_CLASS_REPORT));
        }
        storeSnapshots2.add(TestUtils.generateStoreStats(i, 3, new Random(seed), StatsReportType.PARTITION_CLASS_REPORT));
        storeSnapshots2Copy.add(TestUtils.generateStoreStats(i, 3, new Random(seed), StatsReportType.PARTITION_CLASS_REPORT));
    }
    StatsWrapper nodeStatsWrapper1 = TestUtils.generateNodeStats(storeSnapshots1, DEFAULT_TIMESTAMP, StatsReportType.PARTITION_CLASS_REPORT);
    StatsWrapper nodeStatsWrapper2 = TestUtils.generateNodeStats(storeSnapshots2, DEFAULT_TIMESTAMP, StatsReportType.PARTITION_CLASS_REPORT);
    StatsWrapper nodeStatsWrapper2Copy = TestUtils.generateNodeStats(storeSnapshots2Copy, DEFAULT_TIMESTAMP, StatsReportType.PARTITION_CLASS_REPORT);
    Map<String, String> instanceStatsMap = new LinkedHashMap<>();
    instanceStatsMap.put("Instance_1", mapper.writeValueAsString(nodeStatsWrapper1));
    instanceStatsMap.put("Instance_2", mapper.writeValueAsString(nodeStatsWrapper2));
    Pair<StatsSnapshot, StatsSnapshot> aggregatedRawAndValidStats = clusterAggregator.doWork(instanceStatsMap, StatsReportType.PARTITION_CLASS_REPORT);
    // verify aggregation on raw data
    StatsSnapshot expectedRawSnapshot = new StatsSnapshot(0L, null);
    StatsSnapshot.aggregate(expectedRawSnapshot, nodeStatsWrapper1.getSnapshot());
    StatsSnapshot.aggregate(expectedRawSnapshot, nodeStatsWrapper2Copy.getSnapshot());
    expectedRawSnapshot = HelixClusterAggregator.reduceByPartitionClass(expectedRawSnapshot);
    StatsSnapshot rawSnapshot = mapper.readValue(mapper.writeValueAsString(aggregatedRawAndValidStats.getFirst()), StatsSnapshot.class);
    assertTrue("Mismatch in the raw data aggregated snapshot", expectedRawSnapshot.equals(rawSnapshot));
    // verify aggregation on valid data
    StatsSnapshot expectedValidsnapshot = HelixClusterAggregator.reduceByPartitionClass(nodeStatsWrapper2.getSnapshot());
    StatsSnapshot validSnapshot = mapper.readValue(mapper.writeValueAsString(aggregatedRawAndValidStats.getSecond()), StatsSnapshot.class);
    assertTrue("Mismatch in the valid data aggregated snapshot", expectedValidsnapshot.equals(validSnapshot));
}
Also used : Random(java.util.Random) ArrayList(java.util.ArrayList) StatsWrapper(com.github.ambry.server.StatsWrapper) StatsSnapshot(com.github.ambry.server.StatsSnapshot) LinkedHashMap(java.util.LinkedHashMap) Test(org.junit.Test)

Aggregations

StatsSnapshot (com.github.ambry.server.StatsSnapshot)35 HashMap (java.util.HashMap)26 Map (java.util.Map)18 Test (org.junit.Test)18 StatsWrapper (com.github.ambry.server.StatsWrapper)14 Random (java.util.Random)10 ArrayList (java.util.ArrayList)9 StatsReportType (com.github.ambry.server.StatsReportType)6 StorageStatsUtilTest (com.github.ambry.server.StorageStatsUtilTest)6 LinkedHashMap (java.util.LinkedHashMap)6 StatsHeader (com.github.ambry.server.StatsHeader)5 VerifiableProperties (com.github.ambry.config.VerifiableProperties)4 HostAccountStorageStatsWrapper (com.github.ambry.server.HostAccountStorageStatsWrapper)4 Pair (com.github.ambry.utils.Pair)4 HashSet (java.util.HashSet)4 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)3 AccountStatsMySqlStore (com.github.ambry.accountstats.AccountStatsMySqlStore)3 StorageQuotaConfig (com.github.ambry.config.StorageQuotaConfig)3 HostPartitionClassStorageStatsWrapper (com.github.ambry.server.HostPartitionClassStorageStatsWrapper)3 AggregatedAccountStorageStats (com.github.ambry.server.storagestats.AggregatedAccountStorageStats)3