Search in sources :

Example 1 with SparkStatisticsBuilder

use of org.apache.hadoop.hive.ql.exec.spark.Statistic.SparkStatisticsBuilder in project hive by apache.

the class LocalSparkJobStatus method getSparkStatistics.

@Override
public SparkStatistics getSparkStatistics() {
    SparkStatisticsBuilder sparkStatisticsBuilder = new SparkStatisticsBuilder();
    // add Hive operator level statistics.
    sparkStatisticsBuilder.add(sparkCounters);
    // add spark job metrics.
    Map<Integer, List<Map.Entry<TaskMetrics, TaskInfo>>> jobMetric = jobMetricsListener.getJobMetric(jobId);
    if (jobMetric == null) {
        return null;
    }
    MetricsCollection metricsCollection = new MetricsCollection();
    Set<Integer> stageIds = jobMetric.keySet();
    for (int stageId : stageIds) {
        List<Map.Entry<TaskMetrics, TaskInfo>> taskMetrics = jobMetric.get(stageId);
        for (Map.Entry<TaskMetrics, TaskInfo> taskMetric : taskMetrics) {
            Metrics metrics = new Metrics(taskMetric.getKey(), taskMetric.getValue());
            metricsCollection.addMetrics(jobId, stageId, 0, metrics);
        }
    }
    Map<String, Long> flatJobMetric = SparkMetricsUtils.collectMetrics(metricsCollection.getAllMetrics());
    for (Map.Entry<String, Long> entry : flatJobMetric.entrySet()) {
        sparkStatisticsBuilder.add(SparkStatisticsNames.SPARK_GROUP_NAME, entry.getKey(), Long.toString(entry.getValue()));
    }
    return sparkStatisticsBuilder.build();
}
Also used : MetricsCollection(org.apache.hive.spark.client.MetricsCollection) TaskInfo(org.apache.spark.scheduler.TaskInfo) TaskMetrics(org.apache.spark.executor.TaskMetrics) Metrics(org.apache.hive.spark.client.metrics.Metrics) TaskMetrics(org.apache.spark.executor.TaskMetrics) List(java.util.List) SparkStatisticsBuilder(org.apache.hadoop.hive.ql.exec.spark.Statistic.SparkStatisticsBuilder) HashMap(java.util.HashMap) Map(java.util.Map)

Example 2 with SparkStatisticsBuilder

use of org.apache.hadoop.hive.ql.exec.spark.Statistic.SparkStatisticsBuilder in project hive by apache.

the class RemoteSparkJobStatus method getSparkStatistics.

@Override
public SparkStatistics getSparkStatistics() {
    MetricsCollection metricsCollection = jobHandle.getMetrics();
    if (metricsCollection == null || getCounter() == null) {
        return null;
    }
    SparkStatisticsBuilder sparkStatisticsBuilder = new SparkStatisticsBuilder();
    // add Hive operator level statistics. - e.g. RECORDS_IN, RECORDS_OUT
    sparkStatisticsBuilder.add(getCounter());
    // add spark job metrics. - e.g. metrics collected by Spark itself (JvmGCTime,
    // ExecutorRunTime, etc.)
    Map<String, Long> flatJobMetric = SparkMetricsUtils.collectMetrics(metricsCollection.getAllMetrics());
    for (Map.Entry<String, Long> entry : flatJobMetric.entrySet()) {
        sparkStatisticsBuilder.add(SparkStatisticsNames.SPARK_GROUP_NAME, entry.getKey(), Long.toString(entry.getValue()));
    }
    return sparkStatisticsBuilder.build();
}
Also used : MetricsCollection(org.apache.hive.spark.client.MetricsCollection) SparkStatisticsBuilder(org.apache.hadoop.hive.ql.exec.spark.Statistic.SparkStatisticsBuilder) HashMap(java.util.HashMap) Map(java.util.Map)

Aggregations

HashMap (java.util.HashMap)2 Map (java.util.Map)2 SparkStatisticsBuilder (org.apache.hadoop.hive.ql.exec.spark.Statistic.SparkStatisticsBuilder)2 MetricsCollection (org.apache.hive.spark.client.MetricsCollection)2 List (java.util.List)1 Metrics (org.apache.hive.spark.client.metrics.Metrics)1 TaskMetrics (org.apache.spark.executor.TaskMetrics)1 TaskInfo (org.apache.spark.scheduler.TaskInfo)1