use of org.apache.hadoop.hive.ql.exec.spark.Statistic.SparkStatistic in project hive by apache.
the class SparkTask method logSparkStatistic.
private void logSparkStatistic(SparkStatistics sparkStatistic) {
Iterator<SparkStatisticGroup> groupIterator = sparkStatistic.getStatisticGroups();
while (groupIterator.hasNext()) {
SparkStatisticGroup group = groupIterator.next();
LOG.info(group.getGroupName());
Iterator<SparkStatistic> statisticIterator = group.getStatistics();
while (statisticIterator.hasNext()) {
SparkStatistic statistic = statisticIterator.next();
LOG.info("\t" + statistic.getName() + ": " + statistic.getValue());
}
}
}
use of org.apache.hadoop.hive.ql.exec.spark.Statistic.SparkStatistic in project hive by apache.
the class TestSparkStatistics method testSparkStatistics.
@Test
public void testSparkStatistics() throws MalformedURLException, CommandProcessorException {
String confDir = "../../data/conf/spark/standalone/hive-site.xml";
HiveConf.setHiveSiteLocation(new File(confDir).toURI().toURL());
HiveConf conf = new HiveConf();
conf.set("spark.local.dir", Paths.get(System.getProperty("test.tmp.dir"), "TestSparkStatistics-local-dir").toString());
SessionState.start(conf);
Driver driver = null;
try {
driver = new Driver(new QueryState.Builder().withGenerateNewQueryId(true).withHiveConf(conf).build(), null);
driver.run("create table test (col int)");
Assert.assertEquals(0, driver.compile("select * from test order by col", true));
List<SparkTask> sparkTasks = Utilities.getSparkTasks(driver.getPlan().getRootTasks());
Assert.assertEquals(1, sparkTasks.size());
SparkTask sparkTask = sparkTasks.get(0);
TaskQueue taskQueue = new TaskQueue(driver.getContext());
taskQueue.prepare(driver.getPlan());
sparkTask.initialize(driver.getQueryState(), driver.getPlan(), taskQueue, driver.getContext());
Assert.assertEquals(0, sparkTask.execute());
Assert.assertNotNull(sparkTask.getSparkStatistics());
List<SparkStatistic> sparkStats = Lists.newArrayList(sparkTask.getSparkStatistics().getStatisticGroup(SparkStatisticsNames.SPARK_GROUP_NAME).getStatistics());
Assert.assertEquals(26, sparkStats.size());
Map<String, String> statsMap = sparkStats.stream().collect(Collectors.toMap(SparkStatistic::getName, SparkStatistic::getValue));
Assert.assertTrue(Long.parseLong(statsMap.get(SparkStatisticsNames.TASK_DURATION_TIME)) > 0);
Assert.assertTrue(Long.parseLong(statsMap.get(SparkStatisticsNames.EXECUTOR_CPU_TIME)) > 0);
Assert.assertTrue(Long.parseLong(statsMap.get(SparkStatisticsNames.EXECUTOR_DESERIALIZE_CPU_TIME)) > 0);
Assert.assertTrue(Long.parseLong(statsMap.get(SparkStatisticsNames.EXECUTOR_DESERIALIZE_TIME)) > 0);
Assert.assertTrue(Long.parseLong(statsMap.get(SparkStatisticsNames.EXECUTOR_RUN_TIME)) > 0);
} finally {
if (driver != null) {
driver.run("drop table if exists test");
driver.destroy();
}
}
}
use of org.apache.hadoop.hive.ql.exec.spark.Statistic.SparkStatistic in project hive by apache.
the class SparkTask method sparkStatisticsToString.
@VisibleForTesting
static String sparkStatisticsToString(SparkStatistics sparkStatistic, int sparkJobID) {
StringBuilder sparkStatsString = new StringBuilder();
sparkStatsString.append("\n\n");
sparkStatsString.append(String.format("=====Spark Job[%d] Statistics=====", sparkJobID));
sparkStatsString.append("\n\n");
Iterator<SparkStatisticGroup> groupIterator = sparkStatistic.getStatisticGroups();
while (groupIterator.hasNext()) {
SparkStatisticGroup group = groupIterator.next();
sparkStatsString.append(group.getGroupName()).append("\n");
Iterator<SparkStatistic> statisticIterator = group.getStatistics();
while (statisticIterator.hasNext()) {
SparkStatistic statistic = statisticIterator.next();
sparkStatsString.append("\t").append(statistic.getName()).append(": ").append(statistic.getValue()).append("\n");
}
}
return sparkStatsString.toString();
}
Aggregations