Search in sources :

Example 1 with TezSessionState

use of org.apache.hadoop.hive.ql.exec.tez.TezSessionState in project hive by apache.

the class QTestUtil method startSessionState.

private CliSessionState startSessionState(boolean canReuseSession) throws IOException {
    HiveConf.setVar(conf, HiveConf.ConfVars.HIVE_AUTHENTICATOR_MANAGER, "org.apache.hadoop.hive.ql.security.DummyAuthenticator");
    String execEngine = conf.get("hive.execution.engine");
    conf.set("hive.execution.engine", "mr");
    CliSessionState ss = createSessionState();
    assert ss != null;
    ss.in = System.in;
    ss.out = System.out;
    ss.err = System.out;
    SessionState oldSs = SessionState.get();
    if (oldSs != null && canReuseSession && clusterType.getCoreClusterType() == CoreClusterType.TEZ) {
        // Copy the tezSessionState from the old CliSessionState.
        tezSessionState = oldSs.getTezSession();
        ss.setTezSession(tezSessionState);
        oldSs.setTezSession(null);
        oldSs.close();
    }
    if (oldSs != null && clusterType.getCoreClusterType() == CoreClusterType.SPARK) {
        sparkSession = oldSs.getSparkSession();
        ss.setSparkSession(sparkSession);
        oldSs.setSparkSession(null);
        oldSs.close();
    }
    if (oldSs != null && oldSs.out != null && oldSs.out != System.out) {
        oldSs.out.close();
    }
    SessionState.start(ss);
    isSessionStateStarted = true;
    conf.set("hive.execution.engine", execEngine);
    return ss;
}
Also used : TezSessionState(org.apache.hadoop.hive.ql.exec.tez.TezSessionState) CliSessionState(org.apache.hadoop.hive.cli.CliSessionState) SessionState(org.apache.hadoop.hive.ql.session.SessionState) CliSessionState(org.apache.hadoop.hive.cli.CliSessionState)

Example 2 with TezSessionState

use of org.apache.hadoop.hive.ql.exec.tez.TezSessionState in project hive by apache.

the class ResourceMaps method start.

private static synchronized void start(SessionState startSs, boolean isAsync, LogHelper console) {
    setCurrentSessionState(startSs);
    if (startSs.isStarted) {
        return;
    }
    startSs.isStarted = true;
    if (startSs.hiveHist == null) {
        if (startSs.getConf().getBoolVar(HiveConf.ConfVars.HIVE_SESSION_HISTORY_ENABLED)) {
            startSs.hiveHist = new HiveHistoryImpl(startSs);
        } else {
            // Hive history is disabled, create a no-op proxy
            startSs.hiveHist = HiveHistoryProxyHandler.getNoOpHiveHistoryProxy();
        }
    }
    // while and should be done when we start up.
    try {
        UserGroupInformation sessionUGI = Utils.getUGI();
        FileSystem.get(startSs.sessionConf);
        // Create scratch dirs for this session
        startSs.createSessionDirs(sessionUGI.getShortUserName());
        // Set temp file containing results to be sent to HiveClient
        if (startSs.getTmpOutputFile() == null) {
            try {
                startSs.setTmpOutputFile(createTempFile(startSs.getConf()));
            } catch (IOException e) {
                throw new RuntimeException(e);
            }
        }
        // Set temp file containing error output to be sent to client
        if (startSs.getTmpErrOutputFile() == null) {
            try {
                startSs.setTmpErrOutputFile(createTempFile(startSs.getConf()));
            } catch (IOException e) {
                throw new RuntimeException(e);
            }
        }
    } catch (RuntimeException e) {
        throw e;
    } catch (Exception e) {
        // that would cause ClassNoFoundException otherwise
        throw new RuntimeException(e);
    }
    String engine = HiveConf.getVar(startSs.getConf(), HiveConf.ConfVars.HIVE_EXECUTION_ENGINE);
    if (!engine.equals("tez") || startSs.isHiveServerQuery)
        return;
    try {
        if (startSs.tezSessionState == null) {
            startSs.setTezSession(new TezSessionState(startSs.getSessionId()));
        }
        if (startSs.tezSessionState.isOpen()) {
            return;
        }
        if (startSs.tezSessionState.isOpening()) {
            if (!isAsync) {
                startSs.tezSessionState.endOpen();
            }
            return;
        }
        // Neither open nor opening.
        if (!isAsync) {
            // should use conf on session start-up
            startSs.tezSessionState.open(startSs.sessionConf);
        } else {
            startSs.tezSessionState.beginOpen(startSs.sessionConf, null, console);
        }
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}
Also used : IOException(java.io.IOException) HiveHistoryImpl(org.apache.hadoop.hive.ql.history.HiveHistoryImpl) URISyntaxException(java.net.URISyntaxException) LockException(org.apache.hadoop.hive.ql.lockmgr.LockException) CancellationException(java.util.concurrent.CancellationException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) IOException(java.io.IOException) UserGroupInformation(org.apache.hadoop.security.UserGroupInformation) TezSessionState(org.apache.hadoop.hive.ql.exec.tez.TezSessionState)

Example 3 with TezSessionState

use of org.apache.hadoop.hive.ql.exec.tez.TezSessionState in project hive by apache.

the class QTestUtil method cliInit.

public String cliInit(String tname, boolean recreate) throws Exception {
    if (recreate) {
        cleanUp(tname);
        createSources(tname);
    }
    HiveConf.setVar(conf, HiveConf.ConfVars.HIVE_AUTHENTICATOR_MANAGER, "org.apache.hadoop.hive.ql.security.DummyAuthenticator");
    Utilities.clearWorkMap(conf);
    CliSessionState ss = createSessionState();
    assert ss != null;
    ss.in = System.in;
    String outFileExtension = getOutFileExtension(tname);
    String stdoutName = null;
    if (outDir != null) {
        // TODO: why is this needed?
        File qf = new File(outDir, tname);
        stdoutName = qf.getName().concat(outFileExtension);
    } else {
        stdoutName = tname + outFileExtension;
    }
    File outf = new File(logDir, stdoutName);
    OutputStream fo = new BufferedOutputStream(new FileOutputStream(outf));
    if (qSortQuerySet.contains(tname)) {
        ss.out = new SortPrintStream(fo, "UTF-8");
    } else if (qHashQuerySet.contains(tname)) {
        ss.out = new DigestPrintStream(fo, "UTF-8");
    } else if (qSortNHashQuerySet.contains(tname)) {
        ss.out = new SortAndDigestPrintStream(fo, "UTF-8");
    } else {
        ss.out = new PrintStream(fo, true, "UTF-8");
    }
    ss.err = new CachingPrintStream(fo, true, "UTF-8");
    ss.setIsSilent(true);
    SessionState oldSs = SessionState.get();
    boolean canReuseSession = !qNoSessionReuseQuerySet.contains(tname);
    if (oldSs != null && canReuseSession && clusterType.getCoreClusterType() == CoreClusterType.TEZ) {
        // Copy the tezSessionState from the old CliSessionState.
        tezSessionState = oldSs.getTezSession();
        oldSs.setTezSession(null);
        ss.setTezSession(tezSessionState);
        oldSs.close();
    }
    if (oldSs != null && clusterType.getCoreClusterType() == CoreClusterType.SPARK) {
        sparkSession = oldSs.getSparkSession();
        ss.setSparkSession(sparkSession);
        oldSs.setSparkSession(null);
        oldSs.close();
    }
    if (oldSs != null && oldSs.out != null && oldSs.out != System.out) {
        oldSs.out.close();
    }
    SessionState.start(ss);
    cliDriver = new CliDriver();
    if (tname.equals("init_file.q")) {
        ss.initFiles.add(AbstractCliConfig.HIVE_ROOT + "/data/scripts/test_init_file.sql");
    }
    cliDriver.processInitFiles(ss);
    return outf.getAbsolutePath();
}
Also used : SortAndDigestPrintStream(org.apache.hadoop.hive.common.io.SortAndDigestPrintStream) CachingPrintStream(org.apache.hadoop.hive.common.io.CachingPrintStream) DigestPrintStream(org.apache.hadoop.hive.common.io.DigestPrintStream) SortPrintStream(org.apache.hadoop.hive.common.io.SortPrintStream) PrintStream(java.io.PrintStream) TezSessionState(org.apache.hadoop.hive.ql.exec.tez.TezSessionState) CliSessionState(org.apache.hadoop.hive.cli.CliSessionState) SessionState(org.apache.hadoop.hive.ql.session.SessionState) BufferedOutputStream(java.io.BufferedOutputStream) FileOutputStream(java.io.FileOutputStream) OutputStream(java.io.OutputStream) SortAndDigestPrintStream(org.apache.hadoop.hive.common.io.SortAndDigestPrintStream) DigestPrintStream(org.apache.hadoop.hive.common.io.DigestPrintStream) CliSessionState(org.apache.hadoop.hive.cli.CliSessionState) CachingPrintStream(org.apache.hadoop.hive.common.io.CachingPrintStream) FileOutputStream(java.io.FileOutputStream) SortAndDigestPrintStream(org.apache.hadoop.hive.common.io.SortAndDigestPrintStream) File(java.io.File) BufferedOutputStream(java.io.BufferedOutputStream) SortPrintStream(org.apache.hadoop.hive.common.io.SortPrintStream) CliDriver(org.apache.hadoop.hive.cli.CliDriver)

Example 4 with TezSessionState

use of org.apache.hadoop.hive.ql.exec.tez.TezSessionState in project hive by apache.

the class ExecDriver method execute.

/**
   * Execute a query plan using Hadoop.
   */
@SuppressWarnings({ "deprecation", "unchecked" })
@Override
public int execute(DriverContext driverContext) {
    IOPrepareCache ioPrepareCache = IOPrepareCache.get();
    ioPrepareCache.clear();
    boolean success = true;
    Context ctx = driverContext.getCtx();
    boolean ctxCreated = false;
    Path emptyScratchDir;
    JobClient jc = null;
    if (driverContext.isShutdown()) {
        LOG.warn("Task was cancelled");
        return 5;
    }
    MapWork mWork = work.getMapWork();
    ReduceWork rWork = work.getReduceWork();
    try {
        if (ctx == null) {
            ctx = new Context(job);
            ctxCreated = true;
        }
        emptyScratchDir = ctx.getMRTmpPath();
        FileSystem fs = emptyScratchDir.getFileSystem(job);
        fs.mkdirs(emptyScratchDir);
    } catch (IOException e) {
        e.printStackTrace();
        console.printError("Error launching map-reduce job", "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
        return 5;
    }
    HiveFileFormatUtils.prepareJobOutput(job);
    //See the javadoc on HiveOutputFormatImpl and HadoopShims.prepareJobOutput()
    job.setOutputFormat(HiveOutputFormatImpl.class);
    job.setMapperClass(ExecMapper.class);
    job.setMapOutputKeyClass(HiveKey.class);
    job.setMapOutputValueClass(BytesWritable.class);
    try {
        String partitioner = HiveConf.getVar(job, ConfVars.HIVEPARTITIONER);
        job.setPartitionerClass(JavaUtils.loadClass(partitioner));
    } catch (ClassNotFoundException e) {
        throw new RuntimeException(e.getMessage(), e);
    }
    propagateSplitSettings(job, mWork);
    job.setNumReduceTasks(rWork != null ? rWork.getNumReduceTasks().intValue() : 0);
    job.setReducerClass(ExecReducer.class);
    // set input format information if necessary
    setInputAttributes(job);
    // Turn on speculative execution for reducers
    boolean useSpeculativeExecReducers = HiveConf.getBoolVar(job, HiveConf.ConfVars.HIVESPECULATIVEEXECREDUCERS);
    job.setBoolean(MRJobConfig.REDUCE_SPECULATIVE, useSpeculativeExecReducers);
    String inpFormat = HiveConf.getVar(job, HiveConf.ConfVars.HIVEINPUTFORMAT);
    if (mWork.isUseBucketizedHiveInputFormat()) {
        inpFormat = BucketizedHiveInputFormat.class.getName();
    }
    LOG.info("Using " + inpFormat);
    try {
        job.setInputFormat(JavaUtils.loadClass(inpFormat));
    } catch (ClassNotFoundException e) {
        throw new RuntimeException(e.getMessage(), e);
    }
    // No-Op - we don't really write anything here ..
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    int returnVal = 0;
    boolean noName = StringUtils.isEmpty(job.get(MRJobConfig.JOB_NAME));
    if (noName) {
        // This is for a special case to ensure unit tests pass
        job.set(MRJobConfig.JOB_NAME, "JOB" + Utilities.randGen.nextInt());
    }
    try {
        MapredLocalWork localwork = mWork.getMapRedLocalWork();
        if (localwork != null && localwork.hasStagedAlias()) {
            if (!ShimLoader.getHadoopShims().isLocalMode(job)) {
                Path localPath = localwork.getTmpPath();
                Path hdfsPath = mWork.getTmpHDFSPath();
                FileSystem hdfs = hdfsPath.getFileSystem(job);
                FileSystem localFS = localPath.getFileSystem(job);
                FileStatus[] hashtableFiles = localFS.listStatus(localPath);
                int fileNumber = hashtableFiles.length;
                String[] fileNames = new String[fileNumber];
                for (int i = 0; i < fileNumber; i++) {
                    fileNames[i] = hashtableFiles[i].getPath().getName();
                }
                //package and compress all the hashtable files to an archive file
                String stageId = this.getId();
                String archiveFileName = Utilities.generateTarFileName(stageId);
                localwork.setStageID(stageId);
                CompressionUtils.tar(localPath.toUri().getPath(), fileNames, archiveFileName);
                Path archivePath = Utilities.generateTarPath(localPath, stageId);
                LOG.info("Archive " + hashtableFiles.length + " hash table files to " + archivePath);
                //upload archive file to hdfs
                Path hdfsFilePath = Utilities.generateTarPath(hdfsPath, stageId);
                short replication = (short) job.getInt("mapred.submit.replication", 10);
                hdfs.copyFromLocalFile(archivePath, hdfsFilePath);
                hdfs.setReplication(hdfsFilePath, replication);
                LOG.info("Upload 1 archive file  from" + archivePath + " to: " + hdfsFilePath);
                //add the archive file to distributed cache
                DistributedCache.createSymlink(job);
                DistributedCache.addCacheArchive(hdfsFilePath.toUri(), job);
                LOG.info("Add 1 archive file to distributed cache. Archive file: " + hdfsFilePath.toUri());
            }
        }
        work.configureJobConf(job);
        List<Path> inputPaths = Utilities.getInputPaths(job, mWork, emptyScratchDir, ctx, false);
        Utilities.setInputPaths(job, inputPaths);
        Utilities.setMapRedWork(job, work, ctx.getMRTmpPath());
        if (mWork.getSamplingType() > 0 && rWork != null && job.getNumReduceTasks() > 1) {
            try {
                handleSampling(ctx, mWork, job);
                job.setPartitionerClass(HiveTotalOrderPartitioner.class);
            } catch (IllegalStateException e) {
                console.printInfo("Not enough sampling data.. Rolling back to single reducer task");
                rWork.setNumReduceTasks(1);
                job.setNumReduceTasks(1);
            } catch (Exception e) {
                LOG.error("Sampling error", e);
                console.printError(e.toString(), "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
                rWork.setNumReduceTasks(1);
                job.setNumReduceTasks(1);
            }
        }
        jc = new JobClient(job);
        // make this client wait if job tracker is not behaving well.
        Throttle.checkJobTracker(job, LOG);
        if (mWork.isGatheringStats() || (rWork != null && rWork.isGatheringStats())) {
            // initialize stats publishing table
            StatsPublisher statsPublisher;
            StatsFactory factory = StatsFactory.newFactory(job);
            if (factory != null) {
                statsPublisher = factory.getStatsPublisher();
                List<String> statsTmpDir = Utilities.getStatsTmpDirs(mWork, job);
                if (rWork != null) {
                    statsTmpDir.addAll(Utilities.getStatsTmpDirs(rWork, job));
                }
                StatsCollectionContext sc = new StatsCollectionContext(job);
                sc.setStatsTmpDirs(statsTmpDir);
                if (!statsPublisher.init(sc)) {
                    // creating stats table if not exists
                    if (HiveConf.getBoolVar(job, HiveConf.ConfVars.HIVE_STATS_RELIABLE)) {
                        throw new HiveException(ErrorMsg.STATSPUBLISHER_INITIALIZATION_ERROR.getErrorCodedMsg());
                    }
                }
            }
        }
        Utilities.createTmpDirs(job, mWork);
        Utilities.createTmpDirs(job, rWork);
        SessionState ss = SessionState.get();
        if (HiveConf.getVar(job, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez") && ss != null) {
            TezSessionState session = ss.getTezSession();
            TezSessionPoolManager.getInstance().closeIfNotDefault(session, true);
        }
        HiveConfUtil.updateJobCredentialProviders(job);
        // Finally SUBMIT the JOB!
        if (driverContext.isShutdown()) {
            LOG.warn("Task was cancelled");
            return 5;
        }
        rj = jc.submitJob(job);
        if (driverContext.isShutdown()) {
            LOG.warn("Task was cancelled");
            if (rj != null) {
                rj.killJob();
                rj = null;
            }
            return 5;
        }
        this.jobID = rj.getJobID();
        updateStatusInQueryDisplay();
        returnVal = jobExecHelper.progress(rj, jc, ctx);
        success = (returnVal == 0);
    } catch (Exception e) {
        e.printStackTrace();
        setException(e);
        String mesg = " with exception '" + Utilities.getNameMessage(e) + "'";
        if (rj != null) {
            mesg = "Ended Job = " + rj.getJobID() + mesg;
        } else {
            mesg = "Job Submission failed" + mesg;
        }
        // Has to use full name to make sure it does not conflict with
        // org.apache.commons.lang.StringUtils
        console.printError(mesg, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
        success = false;
        returnVal = 1;
    } finally {
        Utilities.clearWork(job);
        try {
            if (ctxCreated) {
                ctx.clear();
            }
            if (rj != null) {
                if (returnVal != 0) {
                    rj.killJob();
                }
                jobID = rj.getID().toString();
            }
            if (jc != null) {
                jc.close();
            }
        } catch (Exception e) {
            LOG.warn("Failed while cleaning up ", e);
        } finally {
            HadoopJobExecHelper.runningJobs.remove(rj);
        }
    }
    // get the list of Dynamic partition paths
    try {
        if (rj != null) {
            if (mWork.getAliasToWork() != null) {
                for (Operator<? extends OperatorDesc> op : mWork.getAliasToWork().values()) {
                    op.jobClose(job, success);
                }
            }
            if (rWork != null) {
                rWork.getReducer().jobClose(job, success);
            }
        }
    } catch (Exception e) {
        // jobClose needs to execute successfully otherwise fail task
        if (success) {
            setException(e);
            success = false;
            returnVal = 3;
            String mesg = "Job Commit failed with exception '" + Utilities.getNameMessage(e) + "'";
            console.printError(mesg, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
        }
    }
    return (returnVal);
}
Also used : SessionState(org.apache.hadoop.hive.ql.session.SessionState) TezSessionState(org.apache.hadoop.hive.ql.exec.tez.TezSessionState) IOPrepareCache(org.apache.hadoop.hive.ql.io.IOPrepareCache) FileStatus(org.apache.hadoop.fs.FileStatus) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) JobClient(org.apache.hadoop.mapred.JobClient) StatsPublisher(org.apache.hadoop.hive.ql.stats.StatsPublisher) StatsFactory(org.apache.hadoop.hive.ql.stats.StatsFactory) FileSystem(org.apache.hadoop.fs.FileSystem) StatsCollectionContext(org.apache.hadoop.hive.ql.stats.StatsCollectionContext) Context(org.apache.hadoop.hive.ql.Context) CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) DriverContext(org.apache.hadoop.hive.ql.DriverContext) Path(org.apache.hadoop.fs.Path) StatsCollectionContext(org.apache.hadoop.hive.ql.stats.StatsCollectionContext) BucketizedHiveInputFormat(org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat) ReduceWork(org.apache.hadoop.hive.ql.plan.ReduceWork) IOException(java.io.IOException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) LogInitializationException(org.apache.hadoop.hive.common.LogUtils.LogInitializationException) IOException(java.io.IOException) TezSessionState(org.apache.hadoop.hive.ql.exec.tez.TezSessionState) MapWork(org.apache.hadoop.hive.ql.plan.MapWork) MapredLocalWork(org.apache.hadoop.hive.ql.plan.MapredLocalWork)

Aggregations

TezSessionState (org.apache.hadoop.hive.ql.exec.tez.TezSessionState)4 SessionState (org.apache.hadoop.hive.ql.session.SessionState)3 IOException (java.io.IOException)2 CliSessionState (org.apache.hadoop.hive.cli.CliSessionState)2 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)2 BufferedOutputStream (java.io.BufferedOutputStream)1 File (java.io.File)1 FileOutputStream (java.io.FileOutputStream)1 OutputStream (java.io.OutputStream)1 PrintStream (java.io.PrintStream)1 URISyntaxException (java.net.URISyntaxException)1 CancellationException (java.util.concurrent.CancellationException)1 FileStatus (org.apache.hadoop.fs.FileStatus)1 FileSystem (org.apache.hadoop.fs.FileSystem)1 Path (org.apache.hadoop.fs.Path)1 CliDriver (org.apache.hadoop.hive.cli.CliDriver)1 LogInitializationException (org.apache.hadoop.hive.common.LogUtils.LogInitializationException)1 CachingPrintStream (org.apache.hadoop.hive.common.io.CachingPrintStream)1 DigestPrintStream (org.apache.hadoop.hive.common.io.DigestPrintStream)1 SortAndDigestPrintStream (org.apache.hadoop.hive.common.io.SortAndDigestPrintStream)1