Search in sources :

Example 21 with JobClient

use of org.apache.hadoop.mapred.JobClient in project hive by apache.

the class TempletonControllerJob method run.

/**
   * Enqueue the job and print out the job id for later collection.
   * @see org.apache.hive.hcatalog.templeton.CompleteDelegator
   */
@Override
public int run(String[] args) throws IOException, InterruptedException, ClassNotFoundException, TException {
    if (LOG.isDebugEnabled()) {
        LOG.debug("Preparing to submit job: " + Arrays.toString(args));
    }
    Configuration conf = getConf();
    conf.set(JAR_ARGS_NAME, TempletonUtils.encodeArray(args));
    String memoryMb = appConf.mapperMemoryMb();
    if (memoryMb != null && memoryMb.length() != 0) {
        conf.set(AppConfig.HADOOP_MAP_MEMORY_MB, memoryMb);
    }
    String amMemoryMB = appConf.amMemoryMb();
    if (amMemoryMB != null && !amMemoryMB.isEmpty()) {
        conf.set(AppConfig.HADOOP_MR_AM_MEMORY_MB, amMemoryMB);
    }
    String amJavaOpts = appConf.controllerAMChildOpts();
    if (amJavaOpts != null && !amJavaOpts.isEmpty()) {
        conf.set(AppConfig.HADOOP_MR_AM_JAVA_OPTS, amJavaOpts);
    }
    String user = UserGroupInformation.getCurrentUser().getShortUserName();
    conf.set("user.name", user);
    job = new Job(conf);
    job.setJarByClass(LaunchMapper.class);
    job.setJobName(TempletonControllerJob.class.getSimpleName());
    job.setMapperClass(LaunchMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setInputFormatClass(SingleInputFormat.class);
    NullOutputFormat<NullWritable, NullWritable> of = new NullOutputFormat<NullWritable, NullWritable>();
    job.setOutputFormatClass(of.getClass());
    job.setNumReduceTasks(0);
    JobClient jc = new JobClient(new JobConf(job.getConfiguration()));
    if (UserGroupInformation.isSecurityEnabled()) {
        Token<DelegationTokenIdentifier> mrdt = jc.getDelegationToken(new Text("mr token"));
        job.getCredentials().addToken(new Text("mr token"), mrdt);
    }
    String metastoreTokenStrForm = addHMSToken(job, user);
    job.submit();
    JobID submittedJobId = job.getJobID();
    if (metastoreTokenStrForm != null) {
        //so that it can be cancelled later from CompleteDelegator
        DelegationTokenCache.getStringFormTokenCache().storeDelegationToken(submittedJobId.toString(), metastoreTokenStrForm);
        LOG.debug("Added metastore delegation token for jobId=" + submittedJobId.toString() + " user=" + user);
    }
    return 0;
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) DelegationTokenIdentifier(org.apache.hadoop.mapreduce.security.token.delegation.DelegationTokenIdentifier) Text(org.apache.hadoop.io.Text) NullWritable(org.apache.hadoop.io.NullWritable) JobClient(org.apache.hadoop.mapred.JobClient) Job(org.apache.hadoop.mapreduce.Job) JobConf(org.apache.hadoop.mapred.JobConf) NullOutputFormat(org.apache.hadoop.mapreduce.lib.output.NullOutputFormat) JobID(org.apache.hadoop.mapreduce.JobID)

Example 22 with JobClient

use of org.apache.hadoop.mapred.JobClient in project hive by apache.

the class PartialScanTask method execute.

@Override
public /**
   * start a new map-reduce job to do partial scan to calculate Stats,
   * almost the same as BlockMergeTask or ExecDriver.
   */
int execute(DriverContext driverContext) {
    HiveConf.setVar(job, HiveConf.ConfVars.HIVEINPUTFORMAT, CombineHiveInputFormat.class.getName());
    success = true;
    HiveFileFormatUtils.prepareJobOutput(job);
    job.setOutputFormat(HiveOutputFormatImpl.class);
    job.setMapperClass(work.getMapperClass());
    Context ctx = driverContext.getCtx();
    boolean ctxCreated = false;
    try {
        if (ctx == null) {
            ctx = new Context(job);
            ctxCreated = true;
        }
    } catch (IOException e) {
        e.printStackTrace();
        console.printError("Error launching map-reduce job", "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
        return 5;
    }
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(NullWritable.class);
    if (work.getNumMapTasks() != null) {
        job.setNumMapTasks(work.getNumMapTasks());
    }
    // zero reducers
    job.setNumReduceTasks(0);
    if (work.getMinSplitSize() != null) {
        HiveConf.setLongVar(job, HiveConf.ConfVars.MAPREDMINSPLITSIZE, work.getMinSplitSize().longValue());
    }
    if (work.getInputformat() != null) {
        HiveConf.setVar(job, HiveConf.ConfVars.HIVEINPUTFORMAT, work.getInputformat());
    }
    String inpFormat = HiveConf.getVar(job, HiveConf.ConfVars.HIVEINPUTFORMAT);
    LOG.info("Using " + inpFormat);
    try {
        job.setInputFormat(JavaUtils.loadClass(inpFormat));
    } catch (ClassNotFoundException e) {
        throw new RuntimeException(e.getMessage(), e);
    }
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(NullWritable.class);
    int returnVal = 0;
    RunningJob rj = null;
    boolean noName = StringUtils.isEmpty(job.get(MRJobConfig.JOB_NAME));
    String jobName = null;
    if (noName && this.getQueryPlan() != null) {
        int maxlen = conf.getIntVar(HiveConf.ConfVars.HIVEJOBNAMELENGTH);
        jobName = Utilities.abbreviate(this.getQueryPlan().getQueryStr(), maxlen - 6);
    }
    if (noName) {
        // This is for a special case to ensure unit tests pass
        job.set(MRJobConfig.JOB_NAME, jobName != null ? jobName : "JOB" + Utilities.randGen.nextInt());
    }
    // pass aggregation key to mapper
    HiveConf.setVar(job, HiveConf.ConfVars.HIVE_STATS_KEY_PREFIX, work.getAggKey());
    job.set(StatsSetupConst.STATS_TMP_LOC, work.getStatsTmpDir());
    try {
        addInputPaths(job, work);
        MapredWork mrWork = new MapredWork();
        mrWork.setMapWork(work);
        Utilities.setMapRedWork(job, mrWork, ctx.getMRTmpPath());
        // remove the pwd from conf file so that job tracker doesn't show this
        // logs
        String pwd = HiveConf.getVar(job, HiveConf.ConfVars.METASTOREPWD);
        if (pwd != null) {
            HiveConf.setVar(job, HiveConf.ConfVars.METASTOREPWD, "HIVE");
        }
        JobClient jc = new JobClient(job);
        String addedJars = Utilities.getResourceFiles(job, SessionState.ResourceType.JAR);
        if (!addedJars.isEmpty()) {
            job.set("tmpjars", addedJars);
        }
        // make this client wait if job trcker is not behaving well.
        Throttle.checkJobTracker(job, LOG);
        if (work.isGatheringStats()) {
            // initialize stats publishing table
            StatsPublisher statsPublisher;
            StatsFactory factory = StatsFactory.newFactory(job);
            if (factory != null) {
                statsPublisher = factory.getStatsPublisher();
                StatsCollectionContext sc = new StatsCollectionContext(job);
                sc.setStatsTmpDir(work.getStatsTmpDir());
                if (!statsPublisher.init(sc)) {
                    // creating stats table if not exists
                    if (HiveConf.getBoolVar(job, HiveConf.ConfVars.HIVE_STATS_RELIABLE)) {
                        throw new HiveException(ErrorMsg.STATSPUBLISHER_INITIALIZATION_ERROR.getErrorCodedMsg());
                    }
                }
            }
        }
        // Finally SUBMIT the JOB!
        rj = jc.submitJob(job);
        this.jobID = rj.getJobID();
        returnVal = jobExecHelper.progress(rj, jc, ctx);
        success = (returnVal == 0);
    } catch (Exception e) {
        e.printStackTrace();
        setException(e);
        String mesg = " with exception '" + Utilities.getNameMessage(e) + "'";
        if (rj != null) {
            mesg = "Ended Job = " + rj.getJobID() + mesg;
        } else {
            mesg = "Job Submission failed" + mesg;
        }
        // Has to use full name to make sure it does not conflict with
        // org.apache.commons.lang.StringUtils
        console.printError(mesg, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
        success = false;
        returnVal = 1;
    } finally {
        try {
            if (ctxCreated) {
                ctx.clear();
            }
            if (rj != null) {
                if (returnVal != 0) {
                    rj.killJob();
                }
            }
        } catch (Exception e) {
            LOG.warn("Failed in cleaning up ", e);
        } finally {
            HadoopJobExecHelper.runningJobs.remove(rj);
        }
    }
    return (returnVal);
}
Also used : StatsCollectionContext(org.apache.hadoop.hive.ql.stats.StatsCollectionContext) Context(org.apache.hadoop.hive.ql.Context) DriverContext(org.apache.hadoop.hive.ql.DriverContext) CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) StatsCollectionContext(org.apache.hadoop.hive.ql.stats.StatsCollectionContext) CombineHiveInputFormat(org.apache.hadoop.hive.ql.io.CombineHiveInputFormat) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) IOException(java.io.IOException) JobClient(org.apache.hadoop.mapred.JobClient) IOException(java.io.IOException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) StatsPublisher(org.apache.hadoop.hive.ql.stats.StatsPublisher) StatsFactory(org.apache.hadoop.hive.ql.stats.StatsFactory) MapredWork(org.apache.hadoop.hive.ql.plan.MapredWork) RunningJob(org.apache.hadoop.mapred.RunningJob)

Example 23 with JobClient

use of org.apache.hadoop.mapred.JobClient in project hive by apache.

the class HadoopJobExecHelper method progress.

private MapRedStats progress(ExecDriverTaskHandle th) throws IOException, LockException {
    JobClient jc = th.getJobClient();
    RunningJob rj = th.getRunningJob();
    SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss,SSS");
    //DecimalFormat longFormatter = new DecimalFormat("###,###");
    long reportTime = System.currentTimeMillis();
    long maxReportInterval = HiveConf.getTimeVar(job, HiveConf.ConfVars.HIVE_LOG_INCREMENTAL_PLAN_PROGRESS_INTERVAL, TimeUnit.MILLISECONDS);
    boolean fatal = false;
    StringBuilder errMsg = new StringBuilder();
    long pullInterval = HiveConf.getLongVar(job, HiveConf.ConfVars.HIVECOUNTERSPULLINTERVAL);
    boolean initializing = true;
    boolean initOutputPrinted = false;
    long cpuMsec = -1;
    int numMap = -1;
    int numReduce = -1;
    List<ClientStatsPublisher> clientStatPublishers = getClientStatPublishers();
    final boolean localMode = ShimLoader.getHadoopShims().isLocalMode(job);
    while (!rj.isComplete()) {
        if (th.getContext() != null) {
            th.getContext().checkHeartbeaterLockException();
        }
        try {
            Thread.sleep(pullInterval);
        } catch (InterruptedException e) {
        }
        if (initializing && rj.getJobState() == JobStatus.PREP) {
            // No reason to poll untill the job is initialized
            continue;
        } else {
            // By now the job is initialized so no reason to do
            // rj.getJobState() again and we do not want to do an extra RPC call
            initializing = false;
        }
        if (!localMode) {
            if (!initOutputPrinted) {
                SessionState ss = SessionState.get();
                String logMapper;
                String logReducer;
                TaskReport[] mappers = jc.getMapTaskReports(rj.getID());
                if (mappers == null) {
                    logMapper = "no information for number of mappers; ";
                } else {
                    numMap = mappers.length;
                    if (ss != null) {
                        ss.getHiveHistory().setTaskProperty(queryId, getId(), Keys.TASK_NUM_MAPPERS, Integer.toString(numMap));
                    }
                    logMapper = "number of mappers: " + numMap + "; ";
                }
                TaskReport[] reducers = jc.getReduceTaskReports(rj.getID());
                if (reducers == null) {
                    logReducer = "no information for number of reducers. ";
                } else {
                    numReduce = reducers.length;
                    if (ss != null) {
                        ss.getHiveHistory().setTaskProperty(queryId, getId(), Keys.TASK_NUM_REDUCERS, Integer.toString(numReduce));
                    }
                    logReducer = "number of reducers: " + numReduce;
                }
                console.printInfo("Hadoop job information for " + getId() + ": " + logMapper + logReducer);
                initOutputPrinted = true;
            }
            RunningJob newRj = jc.getJob(rj.getID());
            if (newRj == null) {
                // So raise a meaningful exception
                throw new IOException("Could not find status of job:" + rj.getID());
            } else {
                th.setRunningJob(newRj);
                rj = newRj;
            }
        }
        // let the job retry several times, which eventually lead to failure.
        if (fatal) {
            // wait until rj.isComplete
            continue;
        }
        Counters ctrs = th.getCounters();
        if (fatal = checkFatalErrors(ctrs, errMsg)) {
            console.printError("[Fatal Error] " + errMsg.toString() + ". Killing the job.");
            rj.killJob();
            continue;
        }
        errMsg.setLength(0);
        updateCounters(ctrs, rj);
        // Prepare data for Client Stat Publishers (if any present) and execute them
        if (clientStatPublishers.size() > 0 && ctrs != null) {
            Map<String, Double> exctractedCounters = extractAllCounterValues(ctrs);
            for (ClientStatsPublisher clientStatPublisher : clientStatPublishers) {
                try {
                    clientStatPublisher.run(exctractedCounters, rj.getID().toString());
                } catch (RuntimeException runtimeException) {
                    LOG.error("Exception " + runtimeException.getClass().getCanonicalName() + " thrown when running clientStatsPublishers. The stack trace is: ", runtimeException);
                }
            }
        }
        if (mapProgress == lastMapProgress && reduceProgress == lastReduceProgress && System.currentTimeMillis() < reportTime + maxReportInterval) {
            continue;
        }
        StringBuilder report = new StringBuilder();
        report.append(dateFormat.format(Calendar.getInstance().getTime()));
        report.append(' ').append(getId());
        report.append(" map = ").append(mapProgress).append("%, ");
        report.append(" reduce = ").append(reduceProgress).append('%');
        // it out.
        if (ctrs != null) {
            Counter counterCpuMsec = ctrs.findCounter("org.apache.hadoop.mapred.Task$Counter", "CPU_MILLISECONDS");
            if (counterCpuMsec != null) {
                long newCpuMSec = counterCpuMsec.getValue();
                if (newCpuMSec > 0) {
                    cpuMsec = newCpuMSec;
                    report.append(", Cumulative CPU ").append((cpuMsec / 1000D)).append(" sec");
                }
            }
        }
        // write out serialized plan with counters to log file
        // LOG.info(queryPlan);
        String output = report.toString();
        SessionState ss = SessionState.get();
        if (ss != null) {
            ss.getHiveHistory().setTaskCounters(queryId, getId(), ctrs);
            ss.getHiveHistory().setTaskProperty(queryId, getId(), Keys.TASK_HADOOP_PROGRESS, output);
            if (ss.getConf().getBoolVar(HiveConf.ConfVars.HIVE_LOG_INCREMENTAL_PLAN_PROGRESS)) {
                ss.getHiveHistory().progressTask(queryId, this.task);
                this.callBackObj.logPlanProgress(ss);
            }
        }
        console.printInfo(output);
        task.setStatusMessage(output);
        reportTime = System.currentTimeMillis();
    }
    Counters ctrs = th.getCounters();
    if (ctrs != null) {
        Counter counterCpuMsec = ctrs.findCounter("org.apache.hadoop.mapred.Task$Counter", "CPU_MILLISECONDS");
        if (counterCpuMsec != null) {
            long newCpuMSec = counterCpuMsec.getValue();
            if (newCpuMSec > cpuMsec) {
                cpuMsec = newCpuMSec;
            }
        }
    }
    if (cpuMsec > 0) {
        String status = "MapReduce Total cumulative CPU time: " + Utilities.formatMsecToStr(cpuMsec);
        console.printInfo(status);
        task.setStatusMessage(status);
    }
    boolean success;
    if (fatal) {
        success = false;
    } else {
        // the last check before the job is completed
        if (checkFatalErrors(ctrs, errMsg)) {
            console.printError("[Fatal Error] " + errMsg.toString());
            success = false;
        } else {
            SessionState ss = SessionState.get();
            if (ss != null) {
                ss.getHiveHistory().setTaskCounters(queryId, getId(), ctrs);
            }
            success = rj.isSuccessful();
        }
    }
    MapRedStats mapRedStats = new MapRedStats(numMap, numReduce, cpuMsec, success, rj.getID().toString());
    mapRedStats.setCounters(ctrs);
    // update based on the final value of the counters
    updateCounters(ctrs, rj);
    SessionState ss = SessionState.get();
    if (ss != null) {
        this.callBackObj.logPlanProgress(ss);
    }
    // LOG.info(queryPlan);
    return mapRedStats;
}
Also used : SessionState(org.apache.hadoop.hive.ql.session.SessionState) TaskReport(org.apache.hadoop.mapred.TaskReport) IOException(java.io.IOException) JobClient(org.apache.hadoop.mapred.JobClient) Counter(org.apache.hadoop.mapred.Counters.Counter) ClientStatsPublisher(org.apache.hadoop.hive.ql.stats.ClientStatsPublisher) RunningJob(org.apache.hadoop.mapred.RunningJob) Counters(org.apache.hadoop.mapred.Counters) MapRedStats(org.apache.hadoop.hive.ql.MapRedStats) SimpleDateFormat(java.text.SimpleDateFormat)

Example 24 with JobClient

use of org.apache.hadoop.mapred.JobClient in project hive by apache.

the class ExecDriver method execute.

/**
   * Execute a query plan using Hadoop.
   */
@SuppressWarnings({ "deprecation", "unchecked" })
@Override
public int execute(DriverContext driverContext) {
    IOPrepareCache ioPrepareCache = IOPrepareCache.get();
    ioPrepareCache.clear();
    boolean success = true;
    Context ctx = driverContext.getCtx();
    boolean ctxCreated = false;
    Path emptyScratchDir;
    JobClient jc = null;
    if (driverContext.isShutdown()) {
        LOG.warn("Task was cancelled");
        return 5;
    }
    MapWork mWork = work.getMapWork();
    ReduceWork rWork = work.getReduceWork();
    try {
        if (ctx == null) {
            ctx = new Context(job);
            ctxCreated = true;
        }
        emptyScratchDir = ctx.getMRTmpPath();
        FileSystem fs = emptyScratchDir.getFileSystem(job);
        fs.mkdirs(emptyScratchDir);
    } catch (IOException e) {
        e.printStackTrace();
        console.printError("Error launching map-reduce job", "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
        return 5;
    }
    HiveFileFormatUtils.prepareJobOutput(job);
    //See the javadoc on HiveOutputFormatImpl and HadoopShims.prepareJobOutput()
    job.setOutputFormat(HiveOutputFormatImpl.class);
    job.setMapperClass(ExecMapper.class);
    job.setMapOutputKeyClass(HiveKey.class);
    job.setMapOutputValueClass(BytesWritable.class);
    try {
        String partitioner = HiveConf.getVar(job, ConfVars.HIVEPARTITIONER);
        job.setPartitionerClass(JavaUtils.loadClass(partitioner));
    } catch (ClassNotFoundException e) {
        throw new RuntimeException(e.getMessage(), e);
    }
    propagateSplitSettings(job, mWork);
    job.setNumReduceTasks(rWork != null ? rWork.getNumReduceTasks().intValue() : 0);
    job.setReducerClass(ExecReducer.class);
    // set input format information if necessary
    setInputAttributes(job);
    // Turn on speculative execution for reducers
    boolean useSpeculativeExecReducers = HiveConf.getBoolVar(job, HiveConf.ConfVars.HIVESPECULATIVEEXECREDUCERS);
    job.setBoolean(MRJobConfig.REDUCE_SPECULATIVE, useSpeculativeExecReducers);
    String inpFormat = HiveConf.getVar(job, HiveConf.ConfVars.HIVEINPUTFORMAT);
    if (mWork.isUseBucketizedHiveInputFormat()) {
        inpFormat = BucketizedHiveInputFormat.class.getName();
    }
    LOG.info("Using " + inpFormat);
    try {
        job.setInputFormat(JavaUtils.loadClass(inpFormat));
    } catch (ClassNotFoundException e) {
        throw new RuntimeException(e.getMessage(), e);
    }
    // No-Op - we don't really write anything here ..
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    int returnVal = 0;
    boolean noName = StringUtils.isEmpty(job.get(MRJobConfig.JOB_NAME));
    if (noName) {
        // This is for a special case to ensure unit tests pass
        job.set(MRJobConfig.JOB_NAME, "JOB" + Utilities.randGen.nextInt());
    }
    try {
        MapredLocalWork localwork = mWork.getMapRedLocalWork();
        if (localwork != null && localwork.hasStagedAlias()) {
            if (!ShimLoader.getHadoopShims().isLocalMode(job)) {
                Path localPath = localwork.getTmpPath();
                Path hdfsPath = mWork.getTmpHDFSPath();
                FileSystem hdfs = hdfsPath.getFileSystem(job);
                FileSystem localFS = localPath.getFileSystem(job);
                FileStatus[] hashtableFiles = localFS.listStatus(localPath);
                int fileNumber = hashtableFiles.length;
                String[] fileNames = new String[fileNumber];
                for (int i = 0; i < fileNumber; i++) {
                    fileNames[i] = hashtableFiles[i].getPath().getName();
                }
                //package and compress all the hashtable files to an archive file
                String stageId = this.getId();
                String archiveFileName = Utilities.generateTarFileName(stageId);
                localwork.setStageID(stageId);
                CompressionUtils.tar(localPath.toUri().getPath(), fileNames, archiveFileName);
                Path archivePath = Utilities.generateTarPath(localPath, stageId);
                LOG.info("Archive " + hashtableFiles.length + " hash table files to " + archivePath);
                //upload archive file to hdfs
                Path hdfsFilePath = Utilities.generateTarPath(hdfsPath, stageId);
                short replication = (short) job.getInt("mapred.submit.replication", 10);
                hdfs.copyFromLocalFile(archivePath, hdfsFilePath);
                hdfs.setReplication(hdfsFilePath, replication);
                LOG.info("Upload 1 archive file  from" + archivePath + " to: " + hdfsFilePath);
                //add the archive file to distributed cache
                DistributedCache.createSymlink(job);
                DistributedCache.addCacheArchive(hdfsFilePath.toUri(), job);
                LOG.info("Add 1 archive file to distributed cache. Archive file: " + hdfsFilePath.toUri());
            }
        }
        work.configureJobConf(job);
        List<Path> inputPaths = Utilities.getInputPaths(job, mWork, emptyScratchDir, ctx, false);
        Utilities.setInputPaths(job, inputPaths);
        Utilities.setMapRedWork(job, work, ctx.getMRTmpPath());
        if (mWork.getSamplingType() > 0 && rWork != null && job.getNumReduceTasks() > 1) {
            try {
                handleSampling(ctx, mWork, job);
                job.setPartitionerClass(HiveTotalOrderPartitioner.class);
            } catch (IllegalStateException e) {
                console.printInfo("Not enough sampling data.. Rolling back to single reducer task");
                rWork.setNumReduceTasks(1);
                job.setNumReduceTasks(1);
            } catch (Exception e) {
                LOG.error("Sampling error", e);
                console.printError(e.toString(), "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
                rWork.setNumReduceTasks(1);
                job.setNumReduceTasks(1);
            }
        }
        jc = new JobClient(job);
        // make this client wait if job tracker is not behaving well.
        Throttle.checkJobTracker(job, LOG);
        if (mWork.isGatheringStats() || (rWork != null && rWork.isGatheringStats())) {
            // initialize stats publishing table
            StatsPublisher statsPublisher;
            StatsFactory factory = StatsFactory.newFactory(job);
            if (factory != null) {
                statsPublisher = factory.getStatsPublisher();
                List<String> statsTmpDir = Utilities.getStatsTmpDirs(mWork, job);
                if (rWork != null) {
                    statsTmpDir.addAll(Utilities.getStatsTmpDirs(rWork, job));
                }
                StatsCollectionContext sc = new StatsCollectionContext(job);
                sc.setStatsTmpDirs(statsTmpDir);
                if (!statsPublisher.init(sc)) {
                    // creating stats table if not exists
                    if (HiveConf.getBoolVar(job, HiveConf.ConfVars.HIVE_STATS_RELIABLE)) {
                        throw new HiveException(ErrorMsg.STATSPUBLISHER_INITIALIZATION_ERROR.getErrorCodedMsg());
                    }
                }
            }
        }
        Utilities.createTmpDirs(job, mWork);
        Utilities.createTmpDirs(job, rWork);
        SessionState ss = SessionState.get();
        if (HiveConf.getVar(job, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez") && ss != null) {
            TezSessionState session = ss.getTezSession();
            TezSessionPoolManager.getInstance().closeIfNotDefault(session, true);
        }
        HiveConfUtil.updateJobCredentialProviders(job);
        // Finally SUBMIT the JOB!
        if (driverContext.isShutdown()) {
            LOG.warn("Task was cancelled");
            return 5;
        }
        rj = jc.submitJob(job);
        if (driverContext.isShutdown()) {
            LOG.warn("Task was cancelled");
            if (rj != null) {
                rj.killJob();
                rj = null;
            }
            return 5;
        }
        this.jobID = rj.getJobID();
        updateStatusInQueryDisplay();
        returnVal = jobExecHelper.progress(rj, jc, ctx);
        success = (returnVal == 0);
    } catch (Exception e) {
        e.printStackTrace();
        setException(e);
        String mesg = " with exception '" + Utilities.getNameMessage(e) + "'";
        if (rj != null) {
            mesg = "Ended Job = " + rj.getJobID() + mesg;
        } else {
            mesg = "Job Submission failed" + mesg;
        }
        // Has to use full name to make sure it does not conflict with
        // org.apache.commons.lang.StringUtils
        console.printError(mesg, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
        success = false;
        returnVal = 1;
    } finally {
        Utilities.clearWork(job);
        try {
            if (ctxCreated) {
                ctx.clear();
            }
            if (rj != null) {
                if (returnVal != 0) {
                    rj.killJob();
                }
                jobID = rj.getID().toString();
            }
            if (jc != null) {
                jc.close();
            }
        } catch (Exception e) {
            LOG.warn("Failed while cleaning up ", e);
        } finally {
            HadoopJobExecHelper.runningJobs.remove(rj);
        }
    }
    // get the list of Dynamic partition paths
    try {
        if (rj != null) {
            if (mWork.getAliasToWork() != null) {
                for (Operator<? extends OperatorDesc> op : mWork.getAliasToWork().values()) {
                    op.jobClose(job, success);
                }
            }
            if (rWork != null) {
                rWork.getReducer().jobClose(job, success);
            }
        }
    } catch (Exception e) {
        // jobClose needs to execute successfully otherwise fail task
        if (success) {
            setException(e);
            success = false;
            returnVal = 3;
            String mesg = "Job Commit failed with exception '" + Utilities.getNameMessage(e) + "'";
            console.printError(mesg, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
        }
    }
    return (returnVal);
}
Also used : SessionState(org.apache.hadoop.hive.ql.session.SessionState) TezSessionState(org.apache.hadoop.hive.ql.exec.tez.TezSessionState) IOPrepareCache(org.apache.hadoop.hive.ql.io.IOPrepareCache) FileStatus(org.apache.hadoop.fs.FileStatus) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) JobClient(org.apache.hadoop.mapred.JobClient) StatsPublisher(org.apache.hadoop.hive.ql.stats.StatsPublisher) StatsFactory(org.apache.hadoop.hive.ql.stats.StatsFactory) FileSystem(org.apache.hadoop.fs.FileSystem) StatsCollectionContext(org.apache.hadoop.hive.ql.stats.StatsCollectionContext) Context(org.apache.hadoop.hive.ql.Context) CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) DriverContext(org.apache.hadoop.hive.ql.DriverContext) Path(org.apache.hadoop.fs.Path) StatsCollectionContext(org.apache.hadoop.hive.ql.stats.StatsCollectionContext) BucketizedHiveInputFormat(org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat) ReduceWork(org.apache.hadoop.hive.ql.plan.ReduceWork) IOException(java.io.IOException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) LogInitializationException(org.apache.hadoop.hive.common.LogUtils.LogInitializationException) IOException(java.io.IOException) TezSessionState(org.apache.hadoop.hive.ql.exec.tez.TezSessionState) MapWork(org.apache.hadoop.hive.ql.plan.MapWork) MapredLocalWork(org.apache.hadoop.hive.ql.plan.MapredLocalWork)

Example 25 with JobClient

use of org.apache.hadoop.mapred.JobClient in project hive by apache.

the class MergeFileTask method execute.

/**
   * start a new map-reduce job to do the merge, almost the same as ExecDriver.
   */
@Override
public int execute(DriverContext driverContext) {
    Context ctx = driverContext.getCtx();
    boolean ctxCreated = false;
    RunningJob rj = null;
    int returnVal = 0;
    try {
        if (ctx == null) {
            ctx = new Context(job);
            ctxCreated = true;
        }
        HiveFileFormatUtils.prepareJobOutput(job);
        job.setInputFormat(work.getInputformatClass());
        job.setOutputFormat(HiveOutputFormatImpl.class);
        job.setMapperClass(MergeFileMapper.class);
        job.setMapOutputKeyClass(NullWritable.class);
        job.setMapOutputValueClass(NullWritable.class);
        job.setOutputKeyClass(NullWritable.class);
        job.setOutputValueClass(NullWritable.class);
        job.setNumReduceTasks(0);
        // create the temp directories
        Path outputPath = work.getOutputDir();
        Path tempOutPath = Utilities.toTempPath(outputPath);
        FileSystem fs = tempOutPath.getFileSystem(job);
        if (!fs.exists(tempOutPath)) {
            fs.mkdirs(tempOutPath);
        }
        ExecDriver.propagateSplitSettings(job, work);
        // set job name
        boolean noName = StringUtils.isEmpty(job.get(MRJobConfig.JOB_NAME));
        String jobName = null;
        if (noName && this.getQueryPlan() != null) {
            int maxlen = conf.getIntVar(HiveConf.ConfVars.HIVEJOBNAMELENGTH);
            jobName = Utilities.abbreviate(this.getQueryPlan().getQueryStr(), maxlen - 6);
        }
        if (noName) {
            // This is for a special case to ensure unit tests pass
            job.set(MRJobConfig.JOB_NAME, jobName != null ? jobName : "JOB" + Utilities.randGen.nextInt());
        }
        // add input path
        addInputPaths(job, work);
        // serialize work
        Utilities.setMapWork(job, work, ctx.getMRTmpPath(), true);
        // remove pwd from conf file so that job tracker doesn't show this logs
        String pwd = HiveConf.getVar(job, HiveConf.ConfVars.METASTOREPWD);
        if (pwd != null) {
            HiveConf.setVar(job, HiveConf.ConfVars.METASTOREPWD, "HIVE");
        }
        // submit the job
        JobClient jc = new JobClient(job);
        String addedJars = Utilities.getResourceFiles(job, SessionState.ResourceType.JAR);
        if (!addedJars.isEmpty()) {
            job.set("tmpjars", addedJars);
        }
        // make this client wait if job trcker is not behaving well.
        Throttle.checkJobTracker(job, LOG);
        // Finally SUBMIT the JOB!
        rj = jc.submitJob(job);
        this.jobID = rj.getJobID();
        returnVal = jobExecHelper.progress(rj, jc, ctx);
        success = (returnVal == 0);
    } catch (Exception e) {
        setException(e);
        String mesg = " with exception '" + Utilities.getNameMessage(e) + "'";
        if (rj != null) {
            mesg = "Ended Job = " + rj.getJobID() + mesg;
        } else {
            mesg = "Job Submission failed" + mesg;
        }
        // Has to use full name to make sure it does not conflict with
        // org.apache.commons.lang.StringUtils
        console.printError(mesg, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
        success = false;
        returnVal = 1;
    } finally {
        try {
            if (ctxCreated) {
                ctx.clear();
            }
            if (rj != null) {
                if (returnVal != 0) {
                    rj.killJob();
                }
            }
            // get the list of Dynamic partition paths
            if (rj != null) {
                if (work.getAliasToWork() != null) {
                    for (Operator<? extends OperatorDesc> op : work.getAliasToWork().values()) {
                        op.jobClose(job, success);
                    }
                }
            }
        } catch (Exception e) {
            // jobClose needs to execute successfully otherwise fail task
            LOG.warn("Job close failed ", e);
            if (success) {
                setException(e);
                success = false;
                returnVal = 3;
                String mesg = "Job Commit failed with exception '" + Utilities.getNameMessage(e) + "'";
                console.printError(mesg, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
            }
        } finally {
            HadoopJobExecHelper.runningJobs.remove(rj);
        }
    }
    return returnVal;
}
Also used : Context(org.apache.hadoop.hive.ql.Context) DriverContext(org.apache.hadoop.hive.ql.DriverContext) CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) Path(org.apache.hadoop.fs.Path) FileSystem(org.apache.hadoop.fs.FileSystem) RunningJob(org.apache.hadoop.mapred.RunningJob) JobClient(org.apache.hadoop.mapred.JobClient) IOException(java.io.IOException)

Aggregations

JobClient (org.apache.hadoop.mapred.JobClient)47 Path (org.apache.hadoop.fs.Path)25 RunningJob (org.apache.hadoop.mapred.RunningJob)20 FileSystem (org.apache.hadoop.fs.FileSystem)18 JobConf (org.apache.hadoop.mapred.JobConf)18 IOException (java.io.IOException)16 Configuration (org.apache.hadoop.conf.Configuration)16 ClusterStatus (org.apache.hadoop.mapred.ClusterStatus)11 Date (java.util.Date)7 Text (org.apache.hadoop.io.Text)6 Counters (org.apache.hadoop.mapred.Counters)6 Test (org.junit.Test)6 DataOutputStream (java.io.DataOutputStream)5 FileStatus (org.apache.hadoop.fs.FileStatus)5 BufferedReader (java.io.BufferedReader)4 InputStreamReader (java.io.InputStreamReader)4 CompilationOpContext (org.apache.hadoop.hive.ql.CompilationOpContext)4 Context (org.apache.hadoop.hive.ql.Context)4 DriverContext (org.apache.hadoop.hive.ql.DriverContext)4 FileOutputFormat (org.apache.hadoop.mapreduce.lib.output.FileOutputFormat)4