use of org.apache.hive.spark.counter.SparkCounter in project hive by apache.
the class SparkTask method execute.
@Override
public int execute() {
int rc = 0;
perfLogger = SessionState.getPerfLogger();
SparkSession sparkSession = null;
SparkSessionManager sparkSessionManager = null;
try {
printConfigInfo();
sparkSessionManager = SparkSessionManagerImpl.getInstance();
sparkSession = SparkUtilities.getSparkSession(conf, sparkSessionManager);
SparkWork sparkWork = getWork();
sparkWork.setRequiredCounterPrefix(getOperatorCounters());
// Submit the Spark job
perfLogger.perfLogBegin(CLASS_NAME, PerfLogger.SPARK_SUBMIT_JOB);
submitTime = perfLogger.getStartTime(PerfLogger.SPARK_SUBMIT_JOB);
jobRef = sparkSession.submit(taskQueue, context, sparkWork);
perfLogger.perfLogEnd(CLASS_NAME, PerfLogger.SPARK_SUBMIT_JOB);
// If the driver context has been shutdown (due to query cancellation) kill the Spark job
if (taskQueue.isShutdown()) {
LOG.warn("Killing Spark job");
killJob();
throw new HiveException(String.format("Spark task %s cancelled for query %s", getId(), sparkWork.getQueryId()));
}
// Get the Job Handle id associated with the Spark job
sparkJobHandleId = jobRef.getJobId();
// Add Spark job handle id to the Hive History
addToHistory(Keys.SPARK_JOB_HANDLE_ID, jobRef.getJobId());
LOG.debug("Starting Spark job with job handle id " + sparkJobHandleId);
// Get the application id of the Spark app
jobID = jobRef.getSparkJobStatus().getAppID();
// Start monitoring the Spark job, returns when the Spark job has completed / failed, or if
// a timeout occurs
rc = jobRef.monitorJob();
// Get the id the Spark job that was launched, returns -1 if no Spark job was launched
sparkJobID = jobRef.getSparkJobStatus().getJobId();
// Add Spark job id to the Hive History
addToHistory(Keys.SPARK_JOB_ID, Integer.toString(sparkJobID));
// Get the final state of the Spark job and parses its job info
SparkJobStatus sparkJobStatus = jobRef.getSparkJobStatus();
getSparkJobInfo(sparkJobStatus);
setSparkException(sparkJobStatus, rc);
if (rc == 0) {
sparkStatistics = sparkJobStatus.getSparkStatistics();
if (SessionState.get() != null) {
// Set the number of rows written in case of insert queries, to print in the client(beeline).
SparkCounters counters = sparkJobStatus.getCounter();
if (counters != null) {
SparkCounter counter = counters.getCounter(HiveConf.getVar(conf, HiveConf.ConfVars.HIVECOUNTERGROUP), FileSinkOperator.TOTAL_TABLE_ROWS_WRITTEN);
if (counter != null) {
queryState.setNumModifiedRows(counter.getValue());
}
}
}
printConsoleMetrics();
printExcessiveGCWarning();
if (LOG.isInfoEnabled() && sparkStatistics != null) {
LOG.info(sparkStatisticsToString(sparkStatistics, sparkJobID));
}
LOG.info("Successfully completed Spark job[" + sparkJobID + "] with application ID " + jobID + " and task ID " + getId());
} else if (rc == 2) {
// Cancel job if the monitor found job submission timeout.
// TODO: If the timeout is because of lack of resources in the cluster, we should
// ideally also cancel the app request here. But w/o facilities from Spark or YARN,
// it's difficult to do it on hive side alone. See HIVE-12650.
LOG.debug("Failed to submit Spark job with job handle id " + sparkJobHandleId);
LOG.info("Failed to submit Spark job for application id " + (Strings.isNullOrEmpty(jobID) ? "UNKNOWN" : jobID));
killJob();
} else if (rc == 4) {
LOG.info("The Spark job or one stage of it has too many tasks" + ". Cancelling Spark job " + sparkJobID + " with application ID " + jobID);
killJob();
}
if (this.jobID == null) {
this.jobID = sparkJobStatus.getAppID();
}
sparkJobStatus.cleanup();
} catch (Exception e) {
LOG.error("Failed to execute Spark task \"" + getId() + "\"", e);
setException(e);
if (e instanceof HiveException) {
HiveException he = (HiveException) e;
rc = he.getCanonicalErrorMsg().getErrorCode();
} else {
rc = 1;
}
} finally {
startTime = perfLogger.getEndTime(PerfLogger.SPARK_SUBMIT_TO_RUNNING);
// In this case, set startTime the same as submitTime.
if (startTime < submitTime) {
startTime = submitTime;
}
finishTime = perfLogger.getEndTime(PerfLogger.SPARK_RUN_JOB);
Utilities.clearWork(conf);
if (sparkSession != null && sparkSessionManager != null) {
rc = close(rc);
try {
sparkSessionManager.returnSession(sparkSession);
} catch (HiveException ex) {
LOG.error("Failed to return the session to SessionManager", ex);
}
}
}
return rc;
}
use of org.apache.hive.spark.counter.SparkCounter in project hive by apache.
the class SparkStatisticsBuilder method add.
public SparkStatisticsBuilder add(SparkCounters sparkCounters) {
for (SparkCounterGroup counterGroup : sparkCounters.getSparkCounterGroups().values()) {
String groupDisplayName = counterGroup.getGroupDisplayName();
List<SparkStatistic> statisticList = statisticMap.get(groupDisplayName);
if (statisticList == null) {
statisticList = new LinkedList<SparkStatistic>();
statisticMap.put(groupDisplayName, statisticList);
}
for (SparkCounter counter : counterGroup.getSparkCounters().values()) {
String displayName = counter.getDisplayName();
statisticList.add(new SparkStatistic(displayName, Long.toString(counter.getValue())));
}
}
return this;
}
Aggregations