Search in sources :

Example 1 with SparkJobStatus

use of org.apache.hadoop.hive.ql.exec.spark.status.SparkJobStatus in project hive by apache.

the class SparkTask method execute.

@Override
public int execute(DriverContext driverContext) {
    int rc = 0;
    perfLogger = SessionState.getPerfLogger();
    SparkSession sparkSession = null;
    SparkSessionManager sparkSessionManager = null;
    try {
        printConfigInfo();
        sparkSessionManager = SparkSessionManagerImpl.getInstance();
        sparkSession = SparkUtilities.getSparkSession(conf, sparkSessionManager);
        SparkWork sparkWork = getWork();
        sparkWork.setRequiredCounterPrefix(getOperatorCounters());
        perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.SPARK_SUBMIT_JOB);
        submitTime = perfLogger.getStartTime(PerfLogger.SPARK_SUBMIT_JOB);
        SparkJobRef jobRef = sparkSession.submit(driverContext, sparkWork);
        perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.SPARK_SUBMIT_JOB);
        addToHistory(jobRef);
        sparkJobID = jobRef.getJobId();
        this.jobID = jobRef.getSparkJobStatus().getAppID();
        rc = jobRef.monitorJob();
        SparkJobStatus sparkJobStatus = jobRef.getSparkJobStatus();
        getSparkJobInfo(sparkJobStatus, rc);
        if (rc == 0) {
            sparkStatistics = sparkJobStatus.getSparkStatistics();
            if (LOG.isInfoEnabled() && sparkStatistics != null) {
                LOG.info(String.format("=====Spark Job[%s] statistics=====", jobRef.getJobId()));
                logSparkStatistic(sparkStatistics);
            }
            LOG.info("Execution completed successfully");
        } else if (rc == 2) {
            // Cancel job if the monitor found job submission timeout.
            // TODO: If the timeout is because of lack of resources in the cluster, we should
            // ideally also cancel the app request here. But w/o facilities from Spark or YARN,
            // it's difficult to do it on hive side alone. See HIVE-12650.
            jobRef.cancelJob();
        }
        if (this.jobID == null) {
            this.jobID = sparkJobStatus.getAppID();
        }
        sparkJobStatus.cleanup();
    } catch (Exception e) {
        String msg = "Failed to execute spark task, with exception '" + Utilities.getNameMessage(e) + "'";
        // Has to use full name to make sure it does not conflict with
        // org.apache.commons.lang.StringUtils
        console.printError(msg, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
        LOG.error(msg, e);
        setException(e);
        rc = 1;
    } finally {
        startTime = perfLogger.getEndTime(PerfLogger.SPARK_SUBMIT_TO_RUNNING);
        // In this case, set startTime the same as submitTime.
        if (startTime < submitTime) {
            startTime = submitTime;
        }
        finishTime = perfLogger.getEndTime(PerfLogger.SPARK_RUN_JOB);
        Utilities.clearWork(conf);
        if (sparkSession != null && sparkSessionManager != null) {
            rc = close(rc);
            try {
                sparkSessionManager.returnSession(sparkSession);
            } catch (HiveException ex) {
                LOG.error("Failed to return the session to SessionManager", ex);
            }
        }
    }
    return rc;
}
Also used : SparkSession(org.apache.hadoop.hive.ql.exec.spark.session.SparkSession) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) SparkJobRef(org.apache.hadoop.hive.ql.exec.spark.status.SparkJobRef) SparkJobStatus(org.apache.hadoop.hive.ql.exec.spark.status.SparkJobStatus) SparkWork(org.apache.hadoop.hive.ql.plan.SparkWork) SparkSessionManager(org.apache.hadoop.hive.ql.exec.spark.session.SparkSessionManager) IOException(java.io.IOException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException)

Aggregations

IOException (java.io.IOException)1 SparkSession (org.apache.hadoop.hive.ql.exec.spark.session.SparkSession)1 SparkSessionManager (org.apache.hadoop.hive.ql.exec.spark.session.SparkSessionManager)1 SparkJobRef (org.apache.hadoop.hive.ql.exec.spark.status.SparkJobRef)1 SparkJobStatus (org.apache.hadoop.hive.ql.exec.spark.status.SparkJobStatus)1 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)1 SparkWork (org.apache.hadoop.hive.ql.plan.SparkWork)1