use of org.apache.hadoop.hive.ql.exec.spark.session.SparkSessionManager in project hive by apache.
the class SetSparkReducerParallelism method getSparkMemoryAndCores.
private void getSparkMemoryAndCores(OptimizeSparkProcContext context) throws SemanticException {
if (sparkMemoryAndCores != null) {
return;
}
if (context.getConf().getBoolean(SPARK_DYNAMIC_ALLOCATION_ENABLED, false)) {
// If dynamic allocation is enabled, numbers for memory and cores are meaningless. So, we don't
// try to get it.
sparkMemoryAndCores = null;
return;
}
SparkSessionManager sparkSessionManager = null;
SparkSession sparkSession = null;
try {
sparkSessionManager = SparkSessionManagerImpl.getInstance();
sparkSession = SparkUtilities.getSparkSession(context.getConf(), sparkSessionManager);
sparkMemoryAndCores = sparkSession.getMemoryAndCores();
} catch (HiveException e) {
throw new SemanticException("Failed to get a spark session: " + e);
} catch (Exception e) {
LOG.warn("Failed to get spark memory/core info", e);
} finally {
if (sparkSession != null && sparkSessionManager != null) {
try {
sparkSessionManager.returnSession(sparkSession);
} catch (HiveException ex) {
LOG.error("Failed to return the session to SessionManager: " + ex, ex);
}
}
}
}
use of org.apache.hadoop.hive.ql.exec.spark.session.SparkSessionManager in project hive by apache.
the class SparkTask method execute.
@Override
public int execute(DriverContext driverContext) {
int rc = 0;
perfLogger = SessionState.getPerfLogger();
SparkSession sparkSession = null;
SparkSessionManager sparkSessionManager = null;
try {
printConfigInfo();
sparkSessionManager = SparkSessionManagerImpl.getInstance();
sparkSession = SparkUtilities.getSparkSession(conf, sparkSessionManager);
SparkWork sparkWork = getWork();
sparkWork.setRequiredCounterPrefix(getOperatorCounters());
perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.SPARK_SUBMIT_JOB);
submitTime = perfLogger.getStartTime(PerfLogger.SPARK_SUBMIT_JOB);
SparkJobRef jobRef = sparkSession.submit(driverContext, sparkWork);
perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.SPARK_SUBMIT_JOB);
addToHistory(jobRef);
sparkJobID = jobRef.getJobId();
this.jobID = jobRef.getSparkJobStatus().getAppID();
rc = jobRef.monitorJob();
SparkJobStatus sparkJobStatus = jobRef.getSparkJobStatus();
getSparkJobInfo(sparkJobStatus, rc);
if (rc == 0) {
sparkStatistics = sparkJobStatus.getSparkStatistics();
if (LOG.isInfoEnabled() && sparkStatistics != null) {
LOG.info(String.format("=====Spark Job[%s] statistics=====", jobRef.getJobId()));
logSparkStatistic(sparkStatistics);
}
LOG.info("Execution completed successfully");
} else if (rc == 2) {
// Cancel job if the monitor found job submission timeout.
// TODO: If the timeout is because of lack of resources in the cluster, we should
// ideally also cancel the app request here. But w/o facilities from Spark or YARN,
// it's difficult to do it on hive side alone. See HIVE-12650.
jobRef.cancelJob();
}
if (this.jobID == null) {
this.jobID = sparkJobStatus.getAppID();
}
sparkJobStatus.cleanup();
} catch (Exception e) {
String msg = "Failed to execute spark task, with exception '" + Utilities.getNameMessage(e) + "'";
// Has to use full name to make sure it does not conflict with
// org.apache.commons.lang.StringUtils
console.printError(msg, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
LOG.error(msg, e);
setException(e);
rc = 1;
} finally {
startTime = perfLogger.getEndTime(PerfLogger.SPARK_SUBMIT_TO_RUNNING);
// In this case, set startTime the same as submitTime.
if (startTime < submitTime) {
startTime = submitTime;
}
finishTime = perfLogger.getEndTime(PerfLogger.SPARK_RUN_JOB);
Utilities.clearWork(conf);
if (sparkSession != null && sparkSessionManager != null) {
rc = close(rc);
try {
sparkSessionManager.returnSession(sparkSession);
} catch (HiveException ex) {
LOG.error("Failed to return the session to SessionManager", ex);
}
}
}
return rc;
}
Aggregations