use of org.apache.hadoop.hive.ql.exec.spark.Statistic.SparkStatisticGroup in project hive by apache.
the class SparkTask method logSparkStatistic.
private void logSparkStatistic(SparkStatistics sparkStatistic) {
Iterator<SparkStatisticGroup> groupIterator = sparkStatistic.getStatisticGroups();
while (groupIterator.hasNext()) {
SparkStatisticGroup group = groupIterator.next();
LOG.info(group.getGroupName());
Iterator<SparkStatistic> statisticIterator = group.getStatistics();
while (statisticIterator.hasNext()) {
SparkStatistic statistic = statisticIterator.next();
LOG.info("\t" + statistic.getName() + ": " + statistic.getValue());
}
}
}
use of org.apache.hadoop.hive.ql.exec.spark.Statistic.SparkStatisticGroup in project hive by apache.
the class SparkTask method printExcessiveGCWarning.
/**
* Use the Spark metrics and calculate how much task executione time was spent performing GC
* operations. If more than a defined threshold of time is spent, print out a warning on the
* console.
*/
private void printExcessiveGCWarning() {
SparkStatisticGroup sparkStatisticGroup = sparkStatistics.getStatisticGroup(SparkStatisticsNames.SPARK_GROUP_NAME);
if (sparkStatisticGroup != null) {
long taskDurationTime = SparkMetricsUtils.getSparkStatisticAsLong(sparkStatisticGroup, SparkStatisticsNames.TASK_DURATION_TIME);
long jvmGCTime = SparkMetricsUtils.getSparkStatisticAsLong(sparkStatisticGroup, SparkStatisticsNames.JVM_GC_TIME);
// Threshold percentage to trigger the GC warning
double threshold = 0.1;
if (jvmGCTime > taskDurationTime * threshold) {
long percentGcTime = Math.round((double) jvmGCTime / taskDurationTime * 100);
String gcWarning = String.format("WARNING: Spark Job[%s] Spent %s%% (%s ms / %s ms) of " + "task time in GC", sparkJobID, percentGcTime, jvmGCTime, taskDurationTime);
console.printInfo(gcWarning);
}
}
}
use of org.apache.hadoop.hive.ql.exec.spark.Statistic.SparkStatisticGroup in project hive by apache.
the class SparkTask method sparkStatisticsToString.
@VisibleForTesting
static String sparkStatisticsToString(SparkStatistics sparkStatistic, int sparkJobID) {
StringBuilder sparkStatsString = new StringBuilder();
sparkStatsString.append("\n\n");
sparkStatsString.append(String.format("=====Spark Job[%d] Statistics=====", sparkJobID));
sparkStatsString.append("\n\n");
Iterator<SparkStatisticGroup> groupIterator = sparkStatistic.getStatisticGroups();
while (groupIterator.hasNext()) {
SparkStatisticGroup group = groupIterator.next();
sparkStatsString.append(group.getGroupName()).append("\n");
Iterator<SparkStatistic> statisticIterator = group.getStatistics();
while (statisticIterator.hasNext()) {
SparkStatistic statistic = statisticIterator.next();
sparkStatsString.append("\t").append(statistic.getName()).append(": ").append(statistic.getValue()).append("\n");
}
}
return sparkStatsString.toString();
}
use of org.apache.hadoop.hive.ql.exec.spark.Statistic.SparkStatisticGroup in project hive by apache.
the class SparkTask method printConsoleMetrics.
private void printConsoleMetrics() {
SparkStatisticGroup sparkStatisticGroup = sparkStatistics.getStatisticGroup(SparkStatisticsNames.SPARK_GROUP_NAME);
if (sparkStatisticGroup != null) {
String colon = ": ";
String forwardSlash = " / ";
String separator = ", ";
String metricsString = String.format("Spark Job[%d] Metrics: ", sparkJobID);
// Task Duration Time
if (sparkStatisticGroup.containsSparkStatistic(SparkStatisticsNames.TASK_DURATION_TIME)) {
metricsString += SparkStatisticsNames.TASK_DURATION_TIME + colon + SparkMetricsUtils.getSparkStatisticAsLong(sparkStatisticGroup, SparkStatisticsNames.TASK_DURATION_TIME) + separator;
}
// Executor CPU Time
if (sparkStatisticGroup.containsSparkStatistic(SparkStatisticsNames.EXECUTOR_CPU_TIME)) {
metricsString += SparkStatisticsNames.EXECUTOR_CPU_TIME + colon + SparkMetricsUtils.getSparkStatisticAsLong(sparkStatisticGroup, SparkStatisticsNames.EXECUTOR_CPU_TIME) + separator;
}
// JCM GC Time
if (sparkStatisticGroup.containsSparkStatistic(SparkStatisticsNames.JVM_GC_TIME)) {
metricsString += SparkStatisticsNames.JVM_GC_TIME + colon + SparkMetricsUtils.getSparkStatisticAsLong(sparkStatisticGroup, SparkStatisticsNames.JVM_GC_TIME) + separator;
}
// Bytes Read / Records Read
if (sparkStatisticGroup.containsSparkStatistic(SparkStatisticsNames.BYTES_READ) && sparkStatisticGroup.containsSparkStatistic(SparkStatisticsNames.RECORDS_READ)) {
metricsString += SparkStatisticsNames.BYTES_READ + forwardSlash + SparkStatisticsNames.RECORDS_READ + colon + SparkMetricsUtils.getSparkStatisticAsLong(sparkStatisticGroup, SparkStatisticsNames.BYTES_READ) + forwardSlash + SparkMetricsUtils.getSparkStatisticAsLong(sparkStatisticGroup, SparkStatisticsNames.RECORDS_READ) + separator;
}
// Shuffle Read Bytes / Shuffle Read Records
if (sparkStatisticGroup.containsSparkStatistic(SparkStatisticsNames.SHUFFLE_TOTAL_BYTES_READ) && sparkStatisticGroup.containsSparkStatistic(SparkStatisticsNames.SHUFFLE_RECORDS_READ)) {
metricsString += SparkStatisticsNames.SHUFFLE_TOTAL_BYTES_READ + forwardSlash + SparkStatisticsNames.SHUFFLE_RECORDS_READ + colon + SparkMetricsUtils.getSparkStatisticAsLong(sparkStatisticGroup, SparkStatisticsNames.SHUFFLE_TOTAL_BYTES_READ) + forwardSlash + SparkMetricsUtils.getSparkStatisticAsLong(sparkStatisticGroup, SparkStatisticsNames.SHUFFLE_RECORDS_READ) + separator;
}
// Shuffle Write Bytes / Shuffle Write Records
if (sparkStatisticGroup.containsSparkStatistic(SparkStatisticsNames.SHUFFLE_BYTES_WRITTEN) && sparkStatisticGroup.containsSparkStatistic(SparkStatisticsNames.SHUFFLE_RECORDS_WRITTEN)) {
metricsString += SparkStatisticsNames.SHUFFLE_BYTES_WRITTEN + forwardSlash + SparkStatisticsNames.SHUFFLE_RECORDS_WRITTEN + colon + SparkMetricsUtils.getSparkStatisticAsLong(sparkStatisticGroup, SparkStatisticsNames.SHUFFLE_BYTES_WRITTEN) + forwardSlash + SparkMetricsUtils.getSparkStatisticAsLong(sparkStatisticGroup, SparkStatisticsNames.SHUFFLE_RECORDS_WRITTEN);
}
console.printInfo(metricsString);
}
}
Aggregations