Search in sources :

Example 6 with CachingPrintStream

use of org.apache.hadoop.hive.common.io.CachingPrintStream in project hive by apache.

the class MapredLocalTask method executeInChildVM.

public int executeInChildVM(DriverContext driverContext) {
    // execute in child jvm
    try {
        // generate the cmd line to run in the child jvm
        Context ctx = driverContext.getCtx();
        String hiveJar = conf.getJar();
        String hadoopExec = conf.getVar(HiveConf.ConfVars.HADOOPBIN);
        conf.setVar(ConfVars.HIVEADDEDJARS, Utilities.getResourceFiles(conf, SessionState.ResourceType.JAR));
        // write out the plan to a local file
        Path planPath = new Path(ctx.getLocalTmpPath(), "plan.xml");
        MapredLocalWork plan = getWork();
        LOG.info("Generating plan file " + planPath.toString());
        OutputStream out = null;
        try {
            out = FileSystem.getLocal(conf).create(planPath);
            SerializationUtilities.serializePlan(plan, out);
            out.close();
            out = null;
        } finally {
            IOUtils.closeQuietly(out);
        }
        String isSilent = "true".equalsIgnoreCase(System.getProperty("test.silent")) ? "-nolog" : "";
        String jarCmd;
        jarCmd = hiveJar + " " + ExecDriver.class.getName();
        String hiveConfArgs = ExecDriver.generateCmdLine(conf, ctx);
        String cmdLine = hadoopExec + " jar " + jarCmd + " -localtask -plan " + planPath.toString() + " " + isSilent + " " + hiveConfArgs;
        String workDir = (new File(".")).getCanonicalPath();
        String files = Utilities.getResourceFiles(conf, SessionState.ResourceType.FILE);
        if (!files.isEmpty()) {
            cmdLine = cmdLine + " -files " + files;
            workDir = ctx.getLocalTmpPath().toUri().getPath();
            if (!(new File(workDir)).mkdir()) {
                throw new IOException("Cannot create tmp working dir: " + workDir);
            }
            for (String f : StringUtils.split(files, ',')) {
                Path p = new Path(f);
                String target = p.toUri().getPath();
                String link = workDir + Path.SEPARATOR + p.getName();
                if (FileUtil.symLink(target, link) != 0) {
                    throw new IOException("Cannot link to added file: " + target + " from: " + link);
                }
            }
        }
        // Inherit Java system variables
        String hadoopOpts;
        StringBuilder sb = new StringBuilder();
        Properties p = System.getProperties();
        for (String element : HIVE_SYS_PROP) {
            if (p.containsKey(element)) {
                sb.append(" -D" + element + "=" + p.getProperty(element));
            }
        }
        hadoopOpts = sb.toString();
        // Inherit the environment variables
        String[] env;
        Map<String, String> variables = new HashMap<String, String>(System.getenv());
        // The user can specify the hadoop memory
        // if ("local".equals(conf.getVar(HiveConf.ConfVars.HADOOPJT))) {
        // if we are running in local mode - then the amount of memory used
        // by the child jvm can no longer default to the memory used by the
        // parent jvm
        // int hadoopMem = conf.getIntVar(HiveConf.ConfVars.HIVEHADOOPMAXMEM);
        int hadoopMem = conf.getIntVar(HiveConf.ConfVars.HIVEHADOOPMAXMEM);
        if (hadoopMem == 0) {
            // remove env var that would default child jvm to use parent's memory
            // as default. child jvm would use default memory for a hadoop client
            variables.remove(HADOOP_MEM_KEY);
        } else {
            // user specified the memory for local mode hadoop run
            console.printInfo(" set heap size\t" + hadoopMem + "MB");
            variables.put(HADOOP_MEM_KEY, String.valueOf(hadoopMem));
        }
        // } else {
        // nothing to do - we are not running in local mode - only submitting
        // the job via a child process. in this case it's appropriate that the
        // child jvm use the same memory as the parent jvm
        // }
        //Set HADOOP_USER_NAME env variable for child process, so that
        // it also runs with hadoop permissions for the user the job is running as
        // This will be used by hadoop only in unsecure(/non kerberos) mode
        String endUserName = Utils.getUGI().getShortUserName();
        LOG.debug("setting HADOOP_USER_NAME\t" + endUserName);
        variables.put("HADOOP_USER_NAME", endUserName);
        if (variables.containsKey(HADOOP_OPTS_KEY)) {
            variables.put(HADOOP_OPTS_KEY, variables.get(HADOOP_OPTS_KEY) + hadoopOpts);
        } else {
            variables.put(HADOOP_OPTS_KEY, hadoopOpts);
        }
        //Hiveserver2 using "-hiveconf hive.hadoop.classpath=%HIVE_LIB%". This is to combine path(s).
        if (HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_HADOOP_CLASSPATH) != null) {
            if (variables.containsKey("HADOOP_CLASSPATH")) {
                variables.put("HADOOP_CLASSPATH", variables.get("HADOOP_CLASSPATH") + ";" + HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_HADOOP_CLASSPATH));
            } else {
                variables.put("HADOOP_CLASSPATH", HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_HADOOP_CLASSPATH));
            }
        }
        if (variables.containsKey(MapRedTask.HIVE_DEBUG_RECURSIVE)) {
            MapRedTask.configureDebugVariablesForChildJVM(variables);
        }
        if (UserGroupInformation.isSecurityEnabled() && UserGroupInformation.isLoginKeytabBased()) {
            //If kerberos security is enabled, and HS2 doAs is enabled,
            // then additional params need to be set so that the command is run as
            // intended user
            secureDoAs = new SecureCmdDoAs(conf);
            secureDoAs.addEnv(variables);
        }
        // have different settings from those of HiveServer2.
        if (variables.containsKey(HIVE_LOCAL_TASK_CHILD_OPTS_KEY)) {
            String childOpts = variables.get(HIVE_LOCAL_TASK_CHILD_OPTS_KEY);
            if (childOpts == null) {
                childOpts = "";
            }
            String clientOpts = variables.put(HADOOP_CLIENT_OPTS, childOpts);
            String tmp = variables.get(HADOOP_OPTS_KEY);
            if (tmp != null && !StringUtils.isBlank(clientOpts)) {
                tmp = tmp.replace(clientOpts, childOpts);
                variables.put(HADOOP_OPTS_KEY, tmp);
            }
        }
        env = new String[variables.size()];
        int pos = 0;
        for (Map.Entry<String, String> entry : variables.entrySet()) {
            String name = entry.getKey();
            String value = entry.getValue();
            env[pos++] = name + "=" + value;
            LOG.debug("Setting env: " + name + "=" + LogUtils.maskIfPassword(name, value));
        }
        LOG.info("Executing: " + cmdLine);
        // Run ExecDriver in another JVM
        executor = Runtime.getRuntime().exec(cmdLine, env, new File(workDir));
        CachingPrintStream errPrintStream = new CachingPrintStream(System.err);
        StreamPrinter outPrinter;
        StreamPrinter errPrinter;
        OperationLog operationLog = OperationLog.getCurrentOperationLog();
        if (operationLog != null) {
            outPrinter = new StreamPrinter(executor.getInputStream(), null, System.out, operationLog.getPrintStream());
            errPrinter = new StreamPrinter(executor.getErrorStream(), null, errPrintStream, operationLog.getPrintStream());
        } else {
            outPrinter = new StreamPrinter(executor.getInputStream(), null, System.out);
            errPrinter = new StreamPrinter(executor.getErrorStream(), null, errPrintStream);
        }
        outPrinter.start();
        errPrinter.start();
        int exitVal = jobExecHelper.progressLocal(executor, getId());
        // wait for stream threads to finish
        outPrinter.join();
        errPrinter.join();
        if (exitVal != 0) {
            LOG.error("Execution failed with exit status: " + exitVal);
            if (SessionState.get() != null) {
                SessionState.get().addLocalMapRedErrors(getId(), errPrintStream.getOutput());
            }
        } else {
            LOG.info("Execution completed successfully");
        }
        return exitVal;
    } catch (Exception e) {
        LOG.error("Exception: ", e);
        return (1);
    } finally {
        if (secureDoAs != null) {
            secureDoAs.close();
        }
    }
}
Also used : Context(org.apache.hadoop.hive.ql.Context) CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) BucketMapJoinContext(org.apache.hadoop.hive.ql.plan.BucketMapJoinContext) DriverContext(org.apache.hadoop.hive.ql.DriverContext) Path(org.apache.hadoop.fs.Path) HashMap(java.util.HashMap) SecureCmdDoAs(org.apache.hadoop.hive.ql.exec.SecureCmdDoAs) OutputStream(java.io.OutputStream) OperationLog(org.apache.hadoop.hive.ql.session.OperationLog) IOException(java.io.IOException) Properties(java.util.Properties) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) MapJoinMemoryExhaustionException(org.apache.hadoop.hive.ql.exec.mapjoin.MapJoinMemoryExhaustionException) IOException(java.io.IOException) CachingPrintStream(org.apache.hadoop.hive.common.io.CachingPrintStream) StreamPrinter(org.apache.hive.common.util.StreamPrinter) MapredLocalWork(org.apache.hadoop.hive.ql.plan.MapredLocalWork) File(java.io.File) Map(java.util.Map) HashMap(java.util.HashMap)

Example 7 with CachingPrintStream

use of org.apache.hadoop.hive.common.io.CachingPrintStream in project hive by apache.

the class CliDriver method run.

public int run(String[] args) throws Exception {
    OptionsProcessor oproc = new OptionsProcessor();
    if (!oproc.process_stage1(args)) {
        return 1;
    }
    // NOTE: It is critical to do this here so that log4j is reinitialized
    // before any of the other core hive classes are loaded
    boolean logInitFailed = false;
    String logInitDetailMessage;
    try {
        logInitDetailMessage = LogUtils.initHiveLog4j();
    } catch (LogInitializationException e) {
        logInitFailed = true;
        logInitDetailMessage = e.getMessage();
    }
    CliSessionState ss = new CliSessionState(new HiveConf(SessionState.class));
    ss.in = System.in;
    try {
        ss.out = new PrintStream(System.out, true, "UTF-8");
        ss.info = new PrintStream(System.err, true, "UTF-8");
        ss.err = new CachingPrintStream(System.err, true, "UTF-8");
    } catch (UnsupportedEncodingException e) {
        return 3;
    }
    if (!oproc.process_stage2(ss)) {
        return 2;
    }
    if (!ss.getIsSilent()) {
        if (logInitFailed) {
            System.err.println(logInitDetailMessage);
        } else {
            SessionState.getConsole().printInfo(logInitDetailMessage);
        }
    }
    // set all properties specified via command line
    HiveConf conf = ss.getConf();
    for (Map.Entry<Object, Object> item : ss.cmdProperties.entrySet()) {
        conf.set((String) item.getKey(), (String) item.getValue());
        ss.getOverriddenConfigurations().put((String) item.getKey(), (String) item.getValue());
    }
    // read prompt configuration and substitute variables.
    prompt = conf.getVar(HiveConf.ConfVars.CLIPROMPT);
    prompt = new VariableSubstitution(new HiveVariableSource() {

        @Override
        public Map<String, String> getHiveVariable() {
            return SessionState.get().getHiveVariables();
        }
    }).substitute(conf, prompt);
    prompt2 = spacesForString(prompt);
    if (HiveConf.getBoolVar(conf, ConfVars.HIVE_CLI_TEZ_SESSION_ASYNC)) {
        // Start the session in a fire-and-forget manner. When the asynchronously initialized parts of
        // the session are needed, the corresponding getters and other methods will wait as needed.
        SessionState.beginStart(ss, console);
    } else {
        SessionState.start(ss);
    }
    ss.updateThreadName();
    // execute cli driver work
    try {
        return executeDriver(ss, conf, oproc);
    } finally {
        ss.resetThreadName();
        ss.close();
    }
}
Also used : SessionState(org.apache.hadoop.hive.ql.session.SessionState) CliSessionState(org.apache.hadoop.hive.cli.CliSessionState) CachingPrintStream(org.apache.hadoop.hive.common.io.CachingPrintStream) PrintStream(java.io.PrintStream) VariableSubstitution(org.apache.hadoop.hive.conf.VariableSubstitution) HiveVariableSource(org.apache.hadoop.hive.conf.HiveVariableSource) UnsupportedEncodingException(java.io.UnsupportedEncodingException) OptionsProcessor(org.apache.hadoop.hive.cli.OptionsProcessor) CliSessionState(org.apache.hadoop.hive.cli.CliSessionState) CachingPrintStream(org.apache.hadoop.hive.common.io.CachingPrintStream) LogInitializationException(org.apache.hadoop.hive.common.LogUtils.LogInitializationException) HiveConf(org.apache.hadoop.hive.conf.HiveConf) Map(java.util.Map)

Aggregations

CachingPrintStream (org.apache.hadoop.hive.common.io.CachingPrintStream)7 File (java.io.File)5 SessionState (org.apache.hadoop.hive.ql.session.SessionState)5 OutputStream (java.io.OutputStream)4 PrintStream (java.io.PrintStream)4 Map (java.util.Map)4 CliSessionState (org.apache.hadoop.hive.cli.CliSessionState)4 BufferedOutputStream (java.io.BufferedOutputStream)2 FileOutputStream (java.io.FileOutputStream)2 IOException (java.io.IOException)2 UnsupportedEncodingException (java.io.UnsupportedEncodingException)2 HashMap (java.util.HashMap)2 Properties (java.util.Properties)2 Path (org.apache.hadoop.fs.Path)2 CliDriver (org.apache.hadoop.hive.cli.CliDriver)2 OptionsProcessor (org.apache.hadoop.hive.cli.OptionsProcessor)2 LogInitializationException (org.apache.hadoop.hive.common.LogUtils.LogInitializationException)2 DigestPrintStream (org.apache.hadoop.hive.common.io.DigestPrintStream)2 SortAndDigestPrintStream (org.apache.hadoop.hive.common.io.SortAndDigestPrintStream)2 SortPrintStream (org.apache.hadoop.hive.common.io.SortPrintStream)2