use of com.github.ambry.server.StatsSnapshot in project ambry by linkedin.
the class HelixClusterAggregator method doWork.
/**
* Take a {@link Map} of instance name to JSON string representation of {@link StatsWrapper} objects and perform cluster wide
* aggregation with them.
* @param statsWrappersJSON a {@link Map} of instance name to JSON string representation of {@link StatsWrapper} objects from the
* node level
* @return a {@link Pair} of Strings whose values represents valid quota stats across all partitions.
* First element is the raw (sum) aggregated stats and second element is average(aggregated) stats for all replicas
* for each partition.
* @throws IOException
*/
Pair<String, String> doWork(Map<String, String> statsWrappersJSON) throws IOException {
StatsSnapshot partitionSnapshot = new StatsSnapshot(0L, new HashMap<String, StatsSnapshot>());
Map<String, Long> partitionTimestampMap = new HashMap<>();
StatsSnapshot rawPartitionSnapshot = new StatsSnapshot(0L, new HashMap<String, StatsSnapshot>());
for (Map.Entry<String, String> statsWrapperJSON : statsWrappersJSON.entrySet()) {
if (statsWrapperJSON != null) {
StatsWrapper snapshotWrapper = mapper.readValue(statsWrapperJSON.getValue(), StatsWrapper.class);
StatsWrapper snapshotWrapperCopy = mapper.readValue(statsWrapperJSON.getValue(), StatsWrapper.class);
combineRaw(rawPartitionSnapshot, snapshotWrapper);
combine(partitionSnapshot, snapshotWrapperCopy, statsWrapperJSON.getKey(), partitionTimestampMap);
}
}
if (logger.isTraceEnabled()) {
logger.trace("Combined raw snapshot {}", mapper.writeValueAsString(rawPartitionSnapshot));
logger.trace("Combined snapshot {}", mapper.writeValueAsString(partitionSnapshot));
}
StatsSnapshot reducedRawSnapshot = reduce(rawPartitionSnapshot);
StatsSnapshot reducedSnapshot = reduce(partitionSnapshot);
if (logger.isTraceEnabled()) {
logger.trace("Reduced raw snapshot {}", mapper.writeValueAsString(reducedRawSnapshot));
logger.trace("Reduced snapshot {}", mapper.writeValueAsString(reducedSnapshot));
}
return new Pair<>(mapper.writeValueAsString(reducedRawSnapshot), mapper.writeValueAsString(reducedSnapshot));
}
use of com.github.ambry.server.StatsSnapshot in project ambry by linkedin.
the class HelixClusterAggregator method combineValidStatsByPartitionClass.
/**
* Aggregate the given {@link StatsSnapshot} with the base {@link StatsSnapshot} by partition class. The aggregation uses
* the same rules in {@link #combineValidStatsByAccount(StatsSnapshot, StatsWrapper, String, Map)}.
*
* The workflow of this method is as follows:
* 1. Check if basePartitionClassMap contains given partitionClass. If yes, go to step 2; If not, directly put it into basePartitionClassMap
* and update partitionTimestampMap by adding all < partition, timestamp > pairs associated with given partitionClass
* 2. For each partition in given partitionClass, check if basePartitionMap contains it. If yes, go to step 3;
* if not, put the partition into basePartitionMap and update partitionTimestampMap by adding the partition and its timestamp.
* 3. Compute the delta value and delta time between given partition and existing one. Update partitionClassVal and
* partitionTimestampMap based on following rules:
* a) if abs(delta time) is within relevantTimePeriodInMs and delta value > 0, replace existing partition with given partition.
* b) if delta time > relevantTimePeriodInMs, which means given partition is newer than existing partition,
* then replace existing partition with given one.
* c) otherwise, ignore the partition(replica) because it is either stale or not the replica with largest value.
* 4. update basePartitionClassMap with up-to-date basePartitionMap and totalValueOfAllClasses.
* The combined snapshot is represented in following format:
* <pre>
* {
* value: 1000,
* subMap:{
* PartitionClass_1: {
* value: 400,
* subMap: {
* Partition[1]:{
* value: 400,
* subMap: {
* Account[1]_Container[1]:{
* value: 400,
* subMap: null
* }
* }
* }
* }
* },
* PartitionClass_2: {
* value: 600,
* subMap:{
* Partition[2]:{
* value: 600,
* subMap:{
* Account[2]_Container[2]:{
* value: 600,
* subMap: null
* }
* }
* }
* }
* }
* }
* }
* </pre>
* @param baseSnapshot baseSnapshot the base {@link StatsSnapshot} which will contain the aggregated result
* @param snapshotWrapper the {@link StatsSnapshot} from each instance to be aggregated to the base {@link StatsSnapshot}
* @param instance new instance from which snapshot is being combined
* @param partitionTimestampMap a {@link Map} of partition to timestamp. It keeps track the current timestamp of each
* partition entry in the base {@link StatsSnapshot}
*/
private void combineValidStatsByPartitionClass(StatsSnapshot baseSnapshot, StatsWrapper snapshotWrapper, String instance, Map<String, Long> partitionTimestampMap) {
Map<String, StatsSnapshot> partitionClassSnapshotMap = snapshotWrapper.getSnapshot().getSubMap();
if (partitionClassSnapshotMap == null) {
logger.info("There is no partition in given StatsSnapshot, skip aggregation on it.");
return;
}
long totalValueOfAllClasses = baseSnapshot.getValue();
long snapshotTimestamp = snapshotWrapper.getHeader().getTimestamp();
Map<String, StatsSnapshot> basePartitionClassMap = baseSnapshot.getSubMap();
for (Map.Entry<String, StatsSnapshot> partitionClassSnapshot : partitionClassSnapshotMap.entrySet()) {
String partitionClassId = partitionClassSnapshot.getKey();
if (basePartitionClassMap.containsKey(partitionClassId)) {
long partitionClassVal = basePartitionClassMap.get(partitionClassId).getValue();
Map<String, StatsSnapshot> basePartitionMap = basePartitionClassMap.get(partitionClassId).getSubMap();
for (Map.Entry<String, StatsSnapshot> partitionToSnapshot : partitionClassSnapshot.getValue().getSubMap().entrySet()) {
String partitionId = partitionToSnapshot.getKey();
StatsSnapshot partitionSnapshot = partitionToSnapshot.getValue();
if (basePartitionMap.containsKey(partitionId)) {
long deltaInValue = partitionSnapshot.getValue() - basePartitionMap.get(partitionId).getValue();
long deltaInTimeMs = snapshotTimestamp - partitionTimestampMap.get(partitionId);
if (Math.abs(deltaInTimeMs) < relevantTimePeriodInMs && deltaInValue > 0) {
basePartitionMap.put(partitionId, partitionSnapshot);
partitionTimestampMap.put(partitionId, snapshotTimestamp);
partitionClassVal += deltaInValue;
totalValueOfAllClasses += deltaInValue;
} else if (deltaInTimeMs > relevantTimePeriodInMs) {
basePartitionMap.put(partitionId, partitionSnapshot);
partitionTimestampMap.put(partitionId, snapshotTimestamp);
partitionClassVal += deltaInValue;
totalValueOfAllClasses += deltaInValue;
} else {
logger.trace("Ignoring snapshot from {} for partition {}", instance, partitionId);
}
} else {
logger.trace("First partition: {} in partitionClass: {}", partitionId, partitionClassId);
basePartitionMap.put(partitionId, partitionSnapshot);
partitionTimestampMap.put(partitionId, snapshotTimestamp);
partitionClassVal += partitionSnapshot.getValue();
totalValueOfAllClasses += partitionSnapshot.getValue();
}
}
// update partitionClass snapshot
basePartitionClassMap.get(partitionClassId).setSubMap(basePartitionMap);
basePartitionClassMap.get(partitionClassId).setValue(partitionClassVal);
} else {
logger.trace("First entry for partitionClass {} is from {}", partitionClassId, instance);
basePartitionClassMap.put(partitionClassId, partitionClassSnapshot.getValue());
// put all partitions associated with this partitionClass into partitionTimestampMap on their first occurrence.
for (String partitionIdStr : partitionClassSnapshot.getValue().getSubMap().keySet()) {
partitionTimestampMap.put(partitionIdStr, snapshotTimestamp);
}
// add aggregated value in this partition class to totalValue
totalValueOfAllClasses += partitionClassSnapshot.getValue().getValue();
}
}
baseSnapshot.setValue(totalValueOfAllClasses);
baseSnapshot.setSubMap(basePartitionClassMap);
}
use of com.github.ambry.server.StatsSnapshot in project ambry by linkedin.
the class HelixHealthReportAggregatorTask method run.
@Override
public TaskResult run() {
Pair<StatsSnapshot, StatsSnapshot> results = null;
Exception exception = null;
try {
HelixDataAccessor helixDataAccessor = manager.getHelixDataAccessor();
List<String> instanceNames = manager.getClusterManagmentTool().getInstancesInCluster(manager.getClusterName());
Map<String, String> statsWrappersJSON = new HashMap<>();
for (String instanceName : instanceNames) {
PropertyKey.Builder keyBuilder = helixDataAccessor.keyBuilder();
HelixProperty record = helixDataAccessor.getProperty(keyBuilder.healthReport(instanceName, healthReportName));
if (record != null && record.getRecord() != null) {
statsWrappersJSON.put(instanceName, record.getRecord().getSimpleField(statsFieldName));
}
}
ObjectMapper mapper = new ObjectMapper();
results = clusterAggregator.doWork(statsWrappersJSON, statsReportType);
String resultId = String.format("%s%s", AGGREGATED_REPORT_PREFIX, healthReportName);
ZNRecord znRecord = new ZNRecord(resultId);
znRecord.setSimpleField(RAW_VALID_SIZE_FIELD_NAME, mapper.writeValueAsString(results.getFirst()));
znRecord.setSimpleField(VALID_SIZE_FIELD_NAME, mapper.writeValueAsString(results.getSecond()));
znRecord.setSimpleField(TIMESTAMP_FIELD_NAME, String.valueOf(time.milliseconds()));
znRecord.setListField(ERROR_OCCURRED_INSTANCES_FIELD_NAME, clusterAggregator.getExceptionOccurredInstances(statsReportType));
String path = String.format("/%s", resultId);
manager.getHelixPropertyStore().set(path, znRecord, AccessOption.PERSISTENT);
return new TaskResult(TaskResult.Status.COMPLETED, "Aggregation success");
} catch (Exception e) {
logger.error("Exception thrown while aggregating stats from health reports across all nodes ", e);
exception = e;
return new TaskResult(TaskResult.Status.FAILED, "Exception thrown");
} finally {
if (clusterMapConfig.clustermapEnableContainerDeletionAggregation && callback != null && results != null && statsReportType.equals(StatsReportType.ACCOUNT_REPORT)) {
callback.onCompletion(results.getFirst(), exception);
}
}
}
use of com.github.ambry.server.StatsSnapshot in project ambry by linkedin.
the class MySqlReportAggregatorTask method run.
@Override
public TaskResult run() {
Exception exception = null;
Histogram fetchTimeMs = statsReportType == StatsReportType.ACCOUNT_REPORT ? metrics.accountStatsFetchTimeMs : metrics.partitionClassStatsFetchTimeMs;
Histogram aggregationTimeMs = statsReportType == StatsReportType.ACCOUNT_REPORT ? metrics.accountStatsAggregationTimeMs : metrics.partitionClassStatsAggregationTimeMs;
long startTimeMs = System.currentTimeMillis();
StatsSnapshot accountPhysicalStorageSnapshot = null;
try {
List<String> instanceNames = manager.getClusterManagmentTool().getInstancesInCluster(manager.getClusterName());
if (statsReportType == StatsReportType.ACCOUNT_REPORT) {
Map<String, HostAccountStorageStatsWrapper> accountStatsWrappers = fetchAccountStorageStatsWrapperForInstances(instanceNames);
fetchTimeMs.update(System.currentTimeMillis() - startTimeMs);
logger.info("Aggregating stats from " + accountStatsWrappers.size() + " hosts");
Pair<AggregatedAccountStorageStats, AggregatedAccountStorageStats> results = clusterAggregator.aggregateHostAccountStorageStatsWrappers(accountStatsWrappers);
if (clusterMapConfig.clustermapEnableDeleteInvalidDataInMysqlAggregationTask) {
removeInvalidAggregatedAccountAndContainerStats(results.getSecond());
}
accountStatsStore.storeAggregatedAccountStorageStats(results.getSecond());
accountPhysicalStorageSnapshot = StorageStatsUtil.convertAggregatedAccountStorageStatsToStatsSnapshot(results.getFirst(), true);
} else if (statsReportType == StatsReportType.PARTITION_CLASS_REPORT) {
Map<String, HostPartitionClassStorageStatsWrapper> statsWrappers = fetchPartitionClassStorageStatsWrapperForInstances(instanceNames);
fetchTimeMs.update(System.currentTimeMillis() - startTimeMs);
logger.info("Aggregating stats from " + statsWrappers.size() + " hosts");
Pair<AggregatedPartitionClassStorageStats, AggregatedPartitionClassStorageStats> results = clusterAggregator.aggregateHostPartitionClassStorageStatsWrappers(statsWrappers);
if (clusterMapConfig.clustermapEnableDeleteInvalidDataInMysqlAggregationTask) {
removeInvalidAggregatedPartitionClassStats(results.getSecond());
}
accountStatsStore.storeAggregatedPartitionClassStorageStats(results.getSecond());
}
// Check if there is a base report for this month or not.
if (clusterMapConfig.clustermapEnableAggregatedMonthlyAccountReport && statsReportType == StatsReportType.ACCOUNT_REPORT) {
// Get the month, if not the same month, then copy the aggregated stats and update the month
String currentMonthValue = LocalDateTime.ofEpochSecond(time.seconds(), 0, ZONE_OFFSET).format(TIMESTAMP_FORMATTER);
String recordedMonthValue = accountStatsStore.queryRecordedMonth();
if (recordedMonthValue == null || recordedMonthValue.isEmpty() || !currentMonthValue.equals(recordedMonthValue)) {
if (clusterMapConfig.clustermapEnableDeleteInvalidDataInMysqlAggregationTask) {
accountStatsStore.deleteSnapshotOfAggregatedAccountStats();
}
logger.info("Taking snapshot of aggregated stats for month " + currentMonthValue);
accountStatsStore.takeSnapshotOfAggregatedAccountStatsAndUpdateMonth(currentMonthValue);
}
}
aggregationTimeMs.update(System.currentTimeMillis() - startTimeMs);
return new TaskResult(TaskResult.Status.COMPLETED, "Aggregation success");
} catch (Exception e) {
logger.error("Exception thrown while aggregating stats from container stats reports across all nodes ", e);
exception = e;
return new TaskResult(TaskResult.Status.FAILED, "Exception thrown");
} finally {
if (clusterMapConfig.clustermapEnableContainerDeletionAggregation && callback != null && accountPhysicalStorageSnapshot != null && statsReportType.equals(StatsReportType.ACCOUNT_REPORT)) {
callback.onCompletion(accountPhysicalStorageSnapshot, exception);
}
}
}
use of com.github.ambry.server.StatsSnapshot in project ambry by linkedin.
the class HelixClusterAggregatorTest method testDoWorkWithDiffNumberOfStores.
/**
* Test stats aggregation with different number of stores on different nodes.
* Only used for partitionClass aggregation testing.
* @throws IOException
*/
@Test
public void testDoWorkWithDiffNumberOfStores() throws IOException {
List<StatsSnapshot> storeSnapshots1 = new ArrayList<>();
List<StatsSnapshot> storeSnapshots2 = new ArrayList<>();
List<StatsSnapshot> storeSnapshots2Copy = new ArrayList<>();
int seed = 1111;
// storeSnapshots1 only has 2 store stats. storeSnapshots2 and storeSnapshots2Copy have 3 store stats each.
for (int i = 3; i < 6; i++) {
if (i < 5) {
storeSnapshots1.add(TestUtils.generateStoreStats(i, 3, new Random(seed), StatsReportType.PARTITION_CLASS_REPORT));
}
storeSnapshots2.add(TestUtils.generateStoreStats(i, 3, new Random(seed), StatsReportType.PARTITION_CLASS_REPORT));
storeSnapshots2Copy.add(TestUtils.generateStoreStats(i, 3, new Random(seed), StatsReportType.PARTITION_CLASS_REPORT));
}
StatsWrapper nodeStatsWrapper1 = TestUtils.generateNodeStats(storeSnapshots1, DEFAULT_TIMESTAMP, StatsReportType.PARTITION_CLASS_REPORT);
StatsWrapper nodeStatsWrapper2 = TestUtils.generateNodeStats(storeSnapshots2, DEFAULT_TIMESTAMP, StatsReportType.PARTITION_CLASS_REPORT);
StatsWrapper nodeStatsWrapper2Copy = TestUtils.generateNodeStats(storeSnapshots2Copy, DEFAULT_TIMESTAMP, StatsReportType.PARTITION_CLASS_REPORT);
Map<String, String> instanceStatsMap = new LinkedHashMap<>();
instanceStatsMap.put("Instance_1", mapper.writeValueAsString(nodeStatsWrapper1));
instanceStatsMap.put("Instance_2", mapper.writeValueAsString(nodeStatsWrapper2));
Pair<StatsSnapshot, StatsSnapshot> aggregatedRawAndValidStats = clusterAggregator.doWork(instanceStatsMap, StatsReportType.PARTITION_CLASS_REPORT);
// verify aggregation on raw data
StatsSnapshot expectedRawSnapshot = new StatsSnapshot(0L, null);
StatsSnapshot.aggregate(expectedRawSnapshot, nodeStatsWrapper1.getSnapshot());
StatsSnapshot.aggregate(expectedRawSnapshot, nodeStatsWrapper2Copy.getSnapshot());
expectedRawSnapshot = HelixClusterAggregator.reduceByPartitionClass(expectedRawSnapshot);
StatsSnapshot rawSnapshot = mapper.readValue(mapper.writeValueAsString(aggregatedRawAndValidStats.getFirst()), StatsSnapshot.class);
assertTrue("Mismatch in the raw data aggregated snapshot", expectedRawSnapshot.equals(rawSnapshot));
// verify aggregation on valid data
StatsSnapshot expectedValidsnapshot = HelixClusterAggregator.reduceByPartitionClass(nodeStatsWrapper2.getSnapshot());
StatsSnapshot validSnapshot = mapper.readValue(mapper.writeValueAsString(aggregatedRawAndValidStats.getSecond()), StatsSnapshot.class);
assertTrue("Mismatch in the valid data aggregated snapshot", expectedValidsnapshot.equals(validSnapshot));
}
Aggregations