Search in sources :

Example 1 with LogRedirector

use of org.apache.hadoop.hive.common.log.LogRedirector in project hive by apache.

the class SparkClientImpl method startDriver.

private Thread startDriver(final RpcServer rpcServer, final String clientId, final String secret) throws IOException {
    Runnable runnable;
    final String serverAddress = rpcServer.getAddress();
    final String serverPort = String.valueOf(rpcServer.getPort());
    // If a Spark installation is provided, use the spark-submit script. Otherwise, call the
    // SparkSubmit class directly, which has some caveats (like having to provide a proper
    // version of Guava on the classpath depending on the deploy mode).
    String sparkHome = Strings.emptyToNull(conf.get(SPARK_HOME_KEY));
    if (sparkHome == null) {
        sparkHome = Strings.emptyToNull(System.getenv(SPARK_HOME_ENV));
    }
    if (sparkHome == null) {
        sparkHome = Strings.emptyToNull(System.getProperty(SPARK_HOME_KEY));
    }
    String sparkLogDir = conf.get("hive.spark.log.dir");
    if (sparkLogDir == null) {
        if (sparkHome == null) {
            sparkLogDir = "./target/";
        } else {
            sparkLogDir = sparkHome + "/logs/";
        }
    }
    String osxTestOpts = "";
    if (Strings.nullToEmpty(System.getProperty("os.name")).toLowerCase().contains("mac")) {
        osxTestOpts = Strings.nullToEmpty(System.getenv(OSX_TEST_OPTS));
    }
    String driverJavaOpts = Joiner.on(" ").skipNulls().join("-Dhive.spark.log.dir=" + sparkLogDir, osxTestOpts, conf.get(DRIVER_OPTS_KEY));
    String executorJavaOpts = Joiner.on(" ").skipNulls().join("-Dhive.spark.log.dir=" + sparkLogDir, osxTestOpts, conf.get(EXECUTOR_OPTS_KEY));
    // Create a file with all the job properties to be read by spark-submit. Change the
    // file's permissions so that only the owner can read it. This avoid having the
    // connection secret show up in the child process's command line.
    File properties = File.createTempFile("spark-submit.", ".properties");
    if (!properties.setReadable(false) || !properties.setReadable(true, true)) {
        throw new IOException("Cannot change permissions of job properties file.");
    }
    properties.deleteOnExit();
    Properties allProps = new Properties();
    // first load the defaults from spark-defaults.conf if available
    try {
        URL sparkDefaultsUrl = Thread.currentThread().getContextClassLoader().getResource("spark-defaults.conf");
        if (sparkDefaultsUrl != null) {
            LOG.info("Loading spark defaults: " + sparkDefaultsUrl);
            allProps.load(new ByteArrayInputStream(Resources.toByteArray(sparkDefaultsUrl)));
        }
    } catch (Exception e) {
        String msg = "Exception trying to load spark-defaults.conf: " + e;
        throw new IOException(msg, e);
    }
    // then load the SparkClientImpl config
    for (Map.Entry<String, String> e : conf.entrySet()) {
        allProps.put(e.getKey(), conf.get(e.getKey()));
    }
    allProps.put(SparkClientFactory.CONF_CLIENT_ID, clientId);
    allProps.put(SparkClientFactory.CONF_KEY_SECRET, secret);
    allProps.put(DRIVER_OPTS_KEY, driverJavaOpts);
    allProps.put(EXECUTOR_OPTS_KEY, executorJavaOpts);
    String isTesting = conf.get("spark.testing");
    if (isTesting != null && isTesting.equalsIgnoreCase("true")) {
        String hiveHadoopTestClasspath = Strings.nullToEmpty(System.getenv("HIVE_HADOOP_TEST_CLASSPATH"));
        if (!hiveHadoopTestClasspath.isEmpty()) {
            String extraDriverClasspath = Strings.nullToEmpty((String) allProps.get(DRIVER_EXTRA_CLASSPATH));
            if (extraDriverClasspath.isEmpty()) {
                allProps.put(DRIVER_EXTRA_CLASSPATH, hiveHadoopTestClasspath);
            } else {
                extraDriverClasspath = extraDriverClasspath.endsWith(File.pathSeparator) ? extraDriverClasspath : extraDriverClasspath + File.pathSeparator;
                allProps.put(DRIVER_EXTRA_CLASSPATH, extraDriverClasspath + hiveHadoopTestClasspath);
            }
            String extraExecutorClasspath = Strings.nullToEmpty((String) allProps.get(EXECUTOR_EXTRA_CLASSPATH));
            if (extraExecutorClasspath.isEmpty()) {
                allProps.put(EXECUTOR_EXTRA_CLASSPATH, hiveHadoopTestClasspath);
            } else {
                extraExecutorClasspath = extraExecutorClasspath.endsWith(File.pathSeparator) ? extraExecutorClasspath : extraExecutorClasspath + File.pathSeparator;
                allProps.put(EXECUTOR_EXTRA_CLASSPATH, extraExecutorClasspath + hiveHadoopTestClasspath);
            }
        }
    }
    Writer writer = new OutputStreamWriter(new FileOutputStream(properties), Charsets.UTF_8);
    try {
        allProps.store(writer, "Spark Context configuration");
    } finally {
        writer.close();
    }
    // Define how to pass options to the child process. If launching in client (or local)
    // mode, the driver options need to be passed directly on the command line. Otherwise,
    // SparkSubmit will take care of that for us.
    String master = conf.get("spark.master");
    Preconditions.checkArgument(master != null, "spark.master is not defined.");
    String deployMode = conf.get("spark.submit.deployMode");
    List<String> argv = Lists.newLinkedList();
    if (sparkHome != null) {
        argv.add(new File(sparkHome, "bin/spark-submit").getAbsolutePath());
    } else {
        LOG.info("No spark.home provided, calling SparkSubmit directly.");
        argv.add(new File(System.getProperty("java.home"), "bin/java").getAbsolutePath());
        if (master.startsWith("local") || master.startsWith("mesos") || SparkClientUtilities.isYarnClientMode(master, deployMode) || master.startsWith("spark")) {
            String mem = conf.get("spark.driver.memory");
            if (mem != null) {
                argv.add("-Xms" + mem);
                argv.add("-Xmx" + mem);
            }
            String cp = conf.get("spark.driver.extraClassPath");
            if (cp != null) {
                argv.add("-classpath");
                argv.add(cp);
            }
            String libPath = conf.get("spark.driver.extraLibPath");
            if (libPath != null) {
                argv.add("-Djava.library.path=" + libPath);
            }
            String extra = conf.get(DRIVER_OPTS_KEY);
            if (extra != null) {
                for (String opt : extra.split("[ ]")) {
                    if (!opt.trim().isEmpty()) {
                        argv.add(opt.trim());
                    }
                }
            }
        }
        argv.add("org.apache.spark.deploy.SparkSubmit");
    }
    if (SparkClientUtilities.isYarnClusterMode(master, deployMode)) {
        String executorCores = conf.get("spark.executor.cores");
        if (executorCores != null) {
            argv.add("--executor-cores");
            argv.add(executorCores);
        }
        String executorMemory = conf.get("spark.executor.memory");
        if (executorMemory != null) {
            argv.add("--executor-memory");
            argv.add(executorMemory);
        }
        String numOfExecutors = conf.get("spark.executor.instances");
        if (numOfExecutors != null) {
            argv.add("--num-executors");
            argv.add(numOfExecutors);
        }
    }
    // long-running application.
    if ("kerberos".equals(hiveConf.get(HADOOP_SECURITY_AUTHENTICATION))) {
        String principal = SecurityUtil.getServerPrincipal(hiveConf.getVar(ConfVars.HIVE_SERVER2_KERBEROS_PRINCIPAL), "0.0.0.0");
        String keyTabFile = hiveConf.getVar(ConfVars.HIVE_SERVER2_KERBEROS_KEYTAB);
        if (StringUtils.isNotBlank(principal) && StringUtils.isNotBlank(keyTabFile)) {
            if (hiveConf.getBoolVar(HiveConf.ConfVars.HIVE_SERVER2_ENABLE_DOAS)) {
                List<String> kinitArgv = Lists.newLinkedList();
                kinitArgv.add("kinit");
                kinitArgv.add(principal);
                kinitArgv.add("-k");
                kinitArgv.add("-t");
                kinitArgv.add(keyTabFile + ";");
                kinitArgv.addAll(argv);
                argv = kinitArgv;
            } else {
                // if doAs is not enabled, we pass the principal/keypad to spark-submit in order to
                // support the possible delegation token renewal in Spark
                argv.add("--principal");
                argv.add(principal);
                argv.add("--keytab");
                argv.add(keyTabFile);
            }
        }
    }
    if (hiveConf.getBoolVar(HiveConf.ConfVars.HIVE_SERVER2_ENABLE_DOAS)) {
        try {
            String currentUser = Utils.getUGI().getShortUserName();
            // do not do impersonation in CLI mode
            if (!currentUser.equals(System.getProperty("user.name"))) {
                LOG.info("Attempting impersonation of " + currentUser);
                argv.add("--proxy-user");
                argv.add(currentUser);
            }
        } catch (Exception e) {
            String msg = "Cannot obtain username: " + e;
            throw new IllegalStateException(msg, e);
        }
    }
    String regStr = conf.get("spark.kryo.registrator");
    if (HIVE_KRYO_REG_NAME.equals(regStr)) {
        argv.add("--jars");
        argv.add(SparkClientUtilities.findKryoRegistratorJar(hiveConf));
    }
    argv.add("--properties-file");
    argv.add(properties.getAbsolutePath());
    argv.add("--class");
    argv.add(RemoteDriver.class.getName());
    String jar = "spark-internal";
    if (SparkContext.jarOfClass(this.getClass()).isDefined()) {
        jar = SparkContext.jarOfClass(this.getClass()).get();
    }
    argv.add(jar);
    argv.add("--remote-host");
    argv.add(serverAddress);
    argv.add("--remote-port");
    argv.add(serverPort);
    // as --properties-file contains the spark.* keys that are meant for SparkConf object.
    for (String hiveSparkConfKey : RpcConfiguration.HIVE_SPARK_RSC_CONFIGS) {
        String value = RpcConfiguration.getValue(hiveConf, hiveSparkConfKey);
        argv.add("--conf");
        argv.add(String.format("%s=%s", hiveSparkConfKey, value));
    }
    String cmd = Joiner.on(" ").join(argv);
    LOG.info("Running client driver with argv: {}", cmd);
    ProcessBuilder pb = new ProcessBuilder("sh", "-c", cmd);
    // Prevent hive configurations from being visible in Spark.
    pb.environment().remove("HIVE_HOME");
    pb.environment().remove("HIVE_CONF_DIR");
    // Add credential provider password to the child process's environment
    // In case of Spark the credential provider location is provided in the jobConf when the job is submitted
    String password = getSparkJobCredentialProviderPassword();
    if (password != null) {
        pb.environment().put(Constants.HADOOP_CREDENTIAL_PASSWORD_ENVVAR, password);
    }
    if (isTesting != null) {
        pb.environment().put("SPARK_TESTING", isTesting);
    }
    final Process child = pb.start();
    String threadName = Thread.currentThread().getName();
    final List<String> childErrorLog = Collections.synchronizedList(new ArrayList<String>());
    final LogRedirector.LogSourceCallback callback = () -> {
        return isAlive;
    };
    LogRedirector.redirect("RemoteDriver-stdout-redir-" + threadName, new LogRedirector(child.getInputStream(), LOG, callback));
    LogRedirector.redirect("RemoteDriver-stderr-redir-" + threadName, new LogRedirector(child.getErrorStream(), LOG, childErrorLog, callback));
    runnable = new Runnable() {

        @Override
        public void run() {
            try {
                int exitCode = child.waitFor();
                if (exitCode != 0) {
                    StringBuilder errStr = new StringBuilder();
                    synchronized (childErrorLog) {
                        Iterator iter = childErrorLog.iterator();
                        while (iter.hasNext()) {
                            errStr.append(iter.next());
                            errStr.append('\n');
                        }
                    }
                    LOG.warn("Child process exited with code {}", exitCode);
                    rpcServer.cancelClient(clientId, "Child process (spark-submit) exited before connecting back with error log " + errStr.toString());
                }
            } catch (InterruptedException ie) {
                LOG.warn("Thread waiting on the child process (spark-submit) is interrupted, killing the child process.");
                rpcServer.cancelClient(clientId, "Thread waiting on the child porcess (spark-submit) is interrupted");
                Thread.interrupted();
                child.destroy();
            } catch (Exception e) {
                String errMsg = "Exception while waiting for child process (spark-submit)";
                LOG.warn(errMsg, e);
                rpcServer.cancelClient(clientId, errMsg);
            }
        }
    };
    Thread thread = new Thread(runnable);
    thread.setDaemon(true);
    thread.setName("Driver");
    thread.start();
    return thread;
}
Also used : Properties(java.util.Properties) URL(java.net.URL) Iterator(java.util.Iterator) LogRedirector(org.apache.hadoop.hive.common.log.LogRedirector) IOException(java.io.IOException) TimeoutException(java.util.concurrent.TimeoutException) SparkException(org.apache.spark.SparkException) IOException(java.io.IOException) ByteArrayInputStream(java.io.ByteArrayInputStream) FileOutputStream(java.io.FileOutputStream) OutputStreamWriter(java.io.OutputStreamWriter) File(java.io.File) Map(java.util.Map) OutputStreamWriter(java.io.OutputStreamWriter) Writer(java.io.Writer)

Example 2 with LogRedirector

use of org.apache.hadoop.hive.common.log.LogRedirector in project hive by apache.

the class MapredLocalTask method executeInChildVM.

private int executeInChildVM() {
    // execute in child jvm
    try {
        // generate the cmd line to run in the child jvm
        String hiveJar = conf.getJar();
        String hadoopExec = conf.getVar(HiveConf.ConfVars.HADOOPBIN);
        conf.setVar(ConfVars.HIVEADDEDJARS, Utilities.getResourceFiles(conf, SessionState.ResourceType.JAR));
        // write out the plan to a local file
        Path planPath = new Path(context.getLocalTmpPath(), "plan.xml");
        MapredLocalWork plan = getWork();
        LOG.info("Generating plan file " + planPath.toString());
        OutputStream out = null;
        try {
            out = FileSystem.getLocal(conf).create(planPath);
            SerializationUtilities.serializePlan(plan, out);
            out.close();
            out = null;
        } finally {
            IOUtils.closeQuietly(out);
        }
        String isSilent = "true".equalsIgnoreCase(System.getProperty("test.silent")) ? "-nolog" : "";
        String libJars = ExecDriver.getResource(conf, ResourceType.JAR);
        String libJarsOption = StringUtils.isEmpty(libJars) ? " " : " -libjars " + libJars + " ";
        String jarCmd = hiveJar + " " + ExecDriver.class.getName() + libJarsOption;
        String hiveConfArgs = ExecDriver.generateCmdLine(conf, context);
        String cmdLine = hadoopExec + " jar " + jarCmd + " -localtask -plan " + planPath.toString() + " " + isSilent + " " + hiveConfArgs;
        String workDir = (new File(".")).getCanonicalPath();
        String files = Utilities.getResourceFiles(conf, SessionState.ResourceType.FILE);
        if (!files.isEmpty()) {
            cmdLine = cmdLine + " -files " + files;
            workDir = context.getLocalTmpPath().toUri().getPath();
            if (!(new File(workDir)).mkdir()) {
                throw new IOException("Cannot create tmp working dir: " + workDir);
            }
            for (String f : StringUtils.split(files, ',')) {
                Path p = new Path(f);
                String target = p.toUri().getPath();
                String link = workDir + Path.SEPARATOR + p.getName();
                if (FileUtil.symLink(target, link) != 0) {
                    throw new IOException("Cannot link to added file: " + target + " from: " + link);
                }
            }
        }
        // Inherit Java system variables
        String hadoopOpts;
        StringBuilder sb = new StringBuilder();
        Properties p = System.getProperties();
        for (String element : HIVE_SYS_PROP) {
            if (p.containsKey(element)) {
                sb.append(" -D" + element + "=" + p.getProperty(element));
            }
        }
        hadoopOpts = sb.toString();
        // Inherit the environment variables
        String[] env;
        Map<String, String> variables = new HashMap<String, String>(System.getenv());
        // The user can specify the hadoop memory
        // if ("local".equals(conf.getVar(HiveConf.ConfVars.HADOOPJT))) {
        // if we are running in local mode - then the amount of memory used
        // by the child jvm can no longer default to the memory used by the
        // parent jvm
        // int hadoopMem = conf.getIntVar(HiveConf.ConfVars.HIVEHADOOPMAXMEM);
        int hadoopMem = conf.getIntVar(HiveConf.ConfVars.HIVEHADOOPMAXMEM);
        if (hadoopMem == 0) {
            // remove env var that would default child jvm to use parent's memory
            // as default. child jvm would use default memory for a hadoop client
            variables.remove(HADOOP_MEM_KEY);
        } else {
            // user specified the memory for local mode hadoop run
            console.printInfo(" set heap size\t" + hadoopMem + "MB");
            variables.put(HADOOP_MEM_KEY, String.valueOf(hadoopMem));
        }
        // } else {
        // nothing to do - we are not running in local mode - only submitting
        // the job via a child process. in this case it's appropriate that the
        // child jvm use the same memory as the parent jvm
        // }
        // Set HADOOP_USER_NAME env variable for child process, so that
        // it also runs with hadoop permissions for the user the job is running as
        // This will be used by hadoop only in unsecure(/non kerberos) mode
        String endUserName = Utils.getUGI().getShortUserName();
        LOG.debug("setting HADOOP_USER_NAME\t" + endUserName);
        variables.put("HADOOP_USER_NAME", endUserName);
        if (variables.containsKey(HADOOP_OPTS_KEY)) {
            variables.put(HADOOP_OPTS_KEY, variables.get(HADOOP_OPTS_KEY) + hadoopOpts);
        } else {
            variables.put(HADOOP_OPTS_KEY, hadoopOpts);
        }
        // Hiveserver2 using "-hiveconf hive.hadoop.classpath=%HIVE_LIB%". This is to combine path(s).
        if (HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_HADOOP_CLASSPATH) != null) {
            if (variables.containsKey("HADOOP_CLASSPATH")) {
                variables.put("HADOOP_CLASSPATH", variables.get("HADOOP_CLASSPATH") + ";" + HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_HADOOP_CLASSPATH));
            } else {
                variables.put("HADOOP_CLASSPATH", HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_HADOOP_CLASSPATH));
            }
        }
        if (variables.containsKey(MapRedTask.HIVE_DEBUG_RECURSIVE)) {
            MapRedTask.configureDebugVariablesForChildJVM(variables);
        }
        if (UserGroupInformation.isSecurityEnabled() && UserGroupInformation.isLoginKeytabBased()) {
            // If kerberos security is enabled, and HS2 doAs is enabled,
            // then additional params need to be set so that the command is run as
            // intended user
            secureDoAs = new SecureCmdDoAs(conf);
            secureDoAs.addEnv(variables);
        }
        // have different settings from those of HiveServer2.
        if (variables.containsKey(HIVE_LOCAL_TASK_CHILD_OPTS_KEY)) {
            String childOpts = variables.get(HIVE_LOCAL_TASK_CHILD_OPTS_KEY);
            if (childOpts == null) {
                childOpts = "";
            }
            String clientOpts = variables.put(HADOOP_CLIENT_OPTS, childOpts);
            String tmp = variables.get(HADOOP_OPTS_KEY);
            if (tmp != null && !StringUtils.isBlank(clientOpts)) {
                tmp = tmp.replace(clientOpts, childOpts);
                variables.put(HADOOP_OPTS_KEY, tmp);
            }
        }
        env = new String[variables.size()];
        int pos = 0;
        for (Map.Entry<String, String> entry : variables.entrySet()) {
            String name = entry.getKey();
            String value = entry.getValue();
            env[pos++] = name + "=" + value;
            LOG.debug("Setting env: " + name + "=" + LogUtils.maskIfPassword(name, value));
        }
        LOG.info("Executing: " + cmdLine);
        // Run ExecDriver in another JVM
        executor = Runtime.getRuntime().exec(cmdLine, env, new File(workDir));
        final LogRedirector.LogSourceCallback callback = () -> {
            return executor.isAlive();
        };
        LogRedirector.redirect(Thread.currentThread().getName() + "-LocalTask-" + getName() + "-stdout", new LogRedirector(executor.getInputStream(), LOG, callback));
        LogRedirector.redirect(Thread.currentThread().getName() + "-LocalTask-" + getName() + "-stderr", new LogRedirector(executor.getErrorStream(), LOG, callback));
        CachingPrintStream errPrintStream = new CachingPrintStream(System.err);
        StreamPrinter outPrinter = new StreamPrinter(executor.getInputStream(), null, System.out);
        StreamPrinter errPrinter = new StreamPrinter(executor.getErrorStream(), null, errPrintStream);
        outPrinter.start();
        errPrinter.start();
        int exitVal = jobExecHelper.progressLocal(executor, getId());
        // wait for stream threads to finish
        outPrinter.join();
        errPrinter.join();
        if (exitVal != 0) {
            LOG.error("Execution failed with exit status: " + exitVal);
            if (SessionState.get() != null) {
                SessionState.get().addLocalMapRedErrors(getId(), errPrintStream.getOutput());
            }
        } else {
            LOG.info("Execution completed successfully");
        }
        return exitVal;
    } catch (Exception e) {
        LOG.error("Exception: ", e);
        return (1);
    } finally {
        if (secureDoAs != null) {
            secureDoAs.close();
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) LogRedirector(org.apache.hadoop.hive.common.log.LogRedirector) HashMap(java.util.HashMap) SecureCmdDoAs(org.apache.hadoop.hive.ql.exec.SecureCmdDoAs) OutputStream(java.io.OutputStream) IOException(java.io.IOException) Properties(java.util.Properties) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) IOException(java.io.IOException) CachingPrintStream(org.apache.hadoop.hive.common.io.CachingPrintStream) StreamPrinter(org.apache.hive.common.util.StreamPrinter) MapredLocalWork(org.apache.hadoop.hive.ql.plan.MapredLocalWork) File(java.io.File) Map(java.util.Map) HashMap(java.util.HashMap)

Example 3 with LogRedirector

use of org.apache.hadoop.hive.common.log.LogRedirector in project hive by apache.

the class MapredLocalTask method executeInChildVM.

public int executeInChildVM(DriverContext driverContext) {
    // execute in child jvm
    try {
        // generate the cmd line to run in the child jvm
        Context ctx = driverContext.getCtx();
        String hiveJar = conf.getJar();
        String hadoopExec = conf.getVar(HiveConf.ConfVars.HADOOPBIN);
        conf.setVar(ConfVars.HIVEADDEDJARS, Utilities.getResourceFiles(conf, SessionState.ResourceType.JAR));
        // write out the plan to a local file
        Path planPath = new Path(ctx.getLocalTmpPath(), "plan.xml");
        MapredLocalWork plan = getWork();
        LOG.info("Generating plan file " + planPath.toString());
        OutputStream out = null;
        try {
            out = FileSystem.getLocal(conf).create(planPath);
            SerializationUtilities.serializePlan(plan, out);
            out.close();
            out = null;
        } finally {
            IOUtils.closeQuietly(out);
        }
        String isSilent = "true".equalsIgnoreCase(System.getProperty("test.silent")) ? "-nolog" : "";
        String libJars = ExecDriver.getResource(conf, ResourceType.JAR);
        String libJarsOption = StringUtils.isEmpty(libJars) ? " " : " -libjars " + libJars + " ";
        String jarCmd = hiveJar + " " + ExecDriver.class.getName() + libJarsOption;
        String hiveConfArgs = ExecDriver.generateCmdLine(conf, ctx);
        String cmdLine = hadoopExec + " jar " + jarCmd + " -localtask -plan " + planPath.toString() + " " + isSilent + " " + hiveConfArgs;
        String workDir = (new File(".")).getCanonicalPath();
        String files = Utilities.getResourceFiles(conf, SessionState.ResourceType.FILE);
        if (!files.isEmpty()) {
            cmdLine = cmdLine + " -files " + files;
            workDir = ctx.getLocalTmpPath().toUri().getPath();
            if (!(new File(workDir)).mkdir()) {
                throw new IOException("Cannot create tmp working dir: " + workDir);
            }
            for (String f : StringUtils.split(files, ',')) {
                Path p = new Path(f);
                String target = p.toUri().getPath();
                String link = workDir + Path.SEPARATOR + p.getName();
                if (FileUtil.symLink(target, link) != 0) {
                    throw new IOException("Cannot link to added file: " + target + " from: " + link);
                }
            }
        }
        // Inherit Java system variables
        String hadoopOpts;
        StringBuilder sb = new StringBuilder();
        Properties p = System.getProperties();
        for (String element : HIVE_SYS_PROP) {
            if (p.containsKey(element)) {
                sb.append(" -D" + element + "=" + p.getProperty(element));
            }
        }
        hadoopOpts = sb.toString();
        // Inherit the environment variables
        String[] env;
        Map<String, String> variables = new HashMap<String, String>(System.getenv());
        // The user can specify the hadoop memory
        // if ("local".equals(conf.getVar(HiveConf.ConfVars.HADOOPJT))) {
        // if we are running in local mode - then the amount of memory used
        // by the child jvm can no longer default to the memory used by the
        // parent jvm
        // int hadoopMem = conf.getIntVar(HiveConf.ConfVars.HIVEHADOOPMAXMEM);
        int hadoopMem = conf.getIntVar(HiveConf.ConfVars.HIVEHADOOPMAXMEM);
        if (hadoopMem == 0) {
            // remove env var that would default child jvm to use parent's memory
            // as default. child jvm would use default memory for a hadoop client
            variables.remove(HADOOP_MEM_KEY);
        } else {
            // user specified the memory for local mode hadoop run
            console.printInfo(" set heap size\t" + hadoopMem + "MB");
            variables.put(HADOOP_MEM_KEY, String.valueOf(hadoopMem));
        }
        // } else {
        // nothing to do - we are not running in local mode - only submitting
        // the job via a child process. in this case it's appropriate that the
        // child jvm use the same memory as the parent jvm
        // }
        // Set HADOOP_USER_NAME env variable for child process, so that
        // it also runs with hadoop permissions for the user the job is running as
        // This will be used by hadoop only in unsecure(/non kerberos) mode
        String endUserName = Utils.getUGI().getShortUserName();
        LOG.debug("setting HADOOP_USER_NAME\t" + endUserName);
        variables.put("HADOOP_USER_NAME", endUserName);
        if (variables.containsKey(HADOOP_OPTS_KEY)) {
            variables.put(HADOOP_OPTS_KEY, variables.get(HADOOP_OPTS_KEY) + hadoopOpts);
        } else {
            variables.put(HADOOP_OPTS_KEY, hadoopOpts);
        }
        // Hiveserver2 using "-hiveconf hive.hadoop.classpath=%HIVE_LIB%". This is to combine path(s).
        if (HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_HADOOP_CLASSPATH) != null) {
            if (variables.containsKey("HADOOP_CLASSPATH")) {
                variables.put("HADOOP_CLASSPATH", variables.get("HADOOP_CLASSPATH") + ";" + HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_HADOOP_CLASSPATH));
            } else {
                variables.put("HADOOP_CLASSPATH", HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_HADOOP_CLASSPATH));
            }
        }
        if (variables.containsKey(MapRedTask.HIVE_DEBUG_RECURSIVE)) {
            MapRedTask.configureDebugVariablesForChildJVM(variables);
        }
        if (UserGroupInformation.isSecurityEnabled() && UserGroupInformation.isLoginKeytabBased()) {
            // If kerberos security is enabled, and HS2 doAs is enabled,
            // then additional params need to be set so that the command is run as
            // intended user
            secureDoAs = new SecureCmdDoAs(conf);
            secureDoAs.addEnv(variables);
        }
        // have different settings from those of HiveServer2.
        if (variables.containsKey(HIVE_LOCAL_TASK_CHILD_OPTS_KEY)) {
            String childOpts = variables.get(HIVE_LOCAL_TASK_CHILD_OPTS_KEY);
            if (childOpts == null) {
                childOpts = "";
            }
            String clientOpts = variables.put(HADOOP_CLIENT_OPTS, childOpts);
            String tmp = variables.get(HADOOP_OPTS_KEY);
            if (tmp != null && !StringUtils.isBlank(clientOpts)) {
                tmp = tmp.replace(clientOpts, childOpts);
                variables.put(HADOOP_OPTS_KEY, tmp);
            }
        }
        env = new String[variables.size()];
        int pos = 0;
        for (Map.Entry<String, String> entry : variables.entrySet()) {
            String name = entry.getKey();
            String value = entry.getValue();
            env[pos++] = name + "=" + value;
            LOG.debug("Setting env: " + name + "=" + LogUtils.maskIfPassword(name, value));
        }
        LOG.info("Executing: " + cmdLine);
        // Run ExecDriver in another JVM
        executor = Runtime.getRuntime().exec(cmdLine, env, new File(workDir));
        final LogRedirector.LogSourceCallback callback = () -> {
            return executor.isAlive();
        };
        LogRedirector.redirect(Thread.currentThread().getName() + "-LocalTask-" + getName() + "-stdout", new LogRedirector(executor.getInputStream(), LOG, callback));
        LogRedirector.redirect(Thread.currentThread().getName() + "-LocalTask-" + getName() + "-stderr", new LogRedirector(executor.getErrorStream(), LOG, callback));
        CachingPrintStream errPrintStream = new CachingPrintStream(System.err);
        StreamPrinter outPrinter = new StreamPrinter(executor.getInputStream(), null, System.out);
        StreamPrinter errPrinter = new StreamPrinter(executor.getErrorStream(), null, errPrintStream);
        outPrinter.start();
        errPrinter.start();
        int exitVal = jobExecHelper.progressLocal(executor, getId());
        // wait for stream threads to finish
        outPrinter.join();
        errPrinter.join();
        if (exitVal != 0) {
            LOG.error("Execution failed with exit status: " + exitVal);
            if (SessionState.get() != null) {
                SessionState.get().addLocalMapRedErrors(getId(), errPrintStream.getOutput());
            }
        } else {
            LOG.info("Execution completed successfully");
        }
        return exitVal;
    } catch (Exception e) {
        LOG.error("Exception: ", e);
        return (1);
    } finally {
        if (secureDoAs != null) {
            secureDoAs.close();
        }
    }
}
Also used : Context(org.apache.hadoop.hive.ql.Context) CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) BucketMapJoinContext(org.apache.hadoop.hive.ql.plan.BucketMapJoinContext) DriverContext(org.apache.hadoop.hive.ql.DriverContext) Path(org.apache.hadoop.fs.Path) LogRedirector(org.apache.hadoop.hive.common.log.LogRedirector) HashMap(java.util.HashMap) SecureCmdDoAs(org.apache.hadoop.hive.ql.exec.SecureCmdDoAs) OutputStream(java.io.OutputStream) IOException(java.io.IOException) Properties(java.util.Properties) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) IOException(java.io.IOException) CachingPrintStream(org.apache.hadoop.hive.common.io.CachingPrintStream) StreamPrinter(org.apache.hive.common.util.StreamPrinter) MapredLocalWork(org.apache.hadoop.hive.ql.plan.MapredLocalWork) File(java.io.File) Map(java.util.Map) HashMap(java.util.HashMap)

Example 4 with LogRedirector

use of org.apache.hadoop.hive.common.log.LogRedirector in project hive by apache.

the class SparkSubmitSparkClient method launchDriver.

@Override
protected Future<Void> launchDriver(String isTesting, RpcServer rpcServer, String clientId) throws IOException {
    Callable<Void> runnable;
    String cmd = Joiner.on(" ").join(argv);
    LOG.info("Running client driver with argv: {}", cmd);
    ProcessBuilder pb = new ProcessBuilder("sh", "-c", cmd);
    // Prevent hive configurations from being visible in Spark.
    pb.environment().remove("HIVE_HOME");
    pb.environment().remove("HIVE_CONF_DIR");
    // Add credential provider password to the child process's environment
    // In case of Spark the credential provider location is provided in the jobConf when the job is submitted
    String password = getSparkJobCredentialProviderPassword();
    if (password != null) {
        pb.environment().put(Constants.HADOOP_CREDENTIAL_PASSWORD_ENVVAR, password);
    }
    if (isTesting != null) {
        pb.environment().put("SPARK_TESTING", isTesting);
    }
    final Process child = pb.start();
    String threadName = Thread.currentThread().getName();
    final List<String> childErrorLog = Collections.synchronizedList(new ArrayList<String>());
    final List<String> childOutLog = Collections.synchronizedList(new ArrayList<String>());
    final LogRedirector.LogSourceCallback callback = () -> isAlive;
    LogRedirector.redirect("spark-submit-stdout-redir-" + threadName, new LogRedirector(child.getInputStream(), LOG, childOutLog, callback));
    LogRedirector.redirect("spark-submit-stderr-redir-" + threadName, new LogRedirector(child.getErrorStream(), LOG, childErrorLog, callback));
    runnable = () -> {
        try {
            int exitCode = child.waitFor();
            if (exitCode == 0) {
                synchronized (childOutLog) {
                    for (String line : childOutLog) {
                        Matcher m = YARN_APPLICATION_ID_REGEX.matcher(line);
                        if (m.find()) {
                            LOG.info("Found application id " + m.group(1));
                            rpcServer.setApplicationId(m.group(1));
                        }
                    }
                }
                synchronized (childErrorLog) {
                    for (String line : childErrorLog) {
                        Matcher m = YARN_APPLICATION_ID_REGEX.matcher(line);
                        if (m.find()) {
                            LOG.info("Found application id " + m.group(1));
                            rpcServer.setApplicationId(m.group(1));
                        }
                    }
                }
            } else {
                List<String> errorMessages = new ArrayList<>();
                synchronized (childErrorLog) {
                    for (String line : childErrorLog) {
                        if (containsErrorKeyword(line)) {
                            errorMessages.add("\"" + line + "\"");
                        }
                    }
                }
                String errStr = errorMessages.isEmpty() ? "?" : Joiner.on(',').join(errorMessages);
                rpcServer.cancelClient(clientId, new RuntimeException("spark-submit process failed " + "with exit code " + exitCode + " and error " + errStr));
            }
        } catch (InterruptedException ie) {
            LOG.warn("Thread waiting on the child process (spark-submit) is interrupted, killing the child process.");
            rpcServer.cancelClient(clientId, "Thread waiting on the child process (spark-submit) is interrupted");
            Thread.interrupted();
            child.destroy();
        } catch (Exception e) {
            String errMsg = "Exception while waiting for child process (spark-submit)";
            LOG.warn(errMsg, e);
            rpcServer.cancelClient(clientId, errMsg);
        }
        return null;
    };
    FutureTask<Void> futureTask = new FutureTask<>(runnable);
    Thread driverThread = new Thread(futureTask);
    driverThread.setDaemon(true);
    driverThread.setName("SparkSubmitMonitor");
    driverThread.start();
    return futureTask;
}
Also used : LogRedirector(org.apache.hadoop.hive.common.log.LogRedirector) Matcher(java.util.regex.Matcher) ArrayList(java.util.ArrayList) IOException(java.io.IOException) FutureTask(java.util.concurrent.FutureTask)

Aggregations

IOException (java.io.IOException)4 LogRedirector (org.apache.hadoop.hive.common.log.LogRedirector)4 File (java.io.File)3 Map (java.util.Map)3 Properties (java.util.Properties)3 OutputStream (java.io.OutputStream)2 HashMap (java.util.HashMap)2 Path (org.apache.hadoop.fs.Path)2 CachingPrintStream (org.apache.hadoop.hive.common.io.CachingPrintStream)2 SecureCmdDoAs (org.apache.hadoop.hive.ql.exec.SecureCmdDoAs)2 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)2 MapredLocalWork (org.apache.hadoop.hive.ql.plan.MapredLocalWork)2 StreamPrinter (org.apache.hive.common.util.StreamPrinter)2 ByteArrayInputStream (java.io.ByteArrayInputStream)1 FileOutputStream (java.io.FileOutputStream)1 OutputStreamWriter (java.io.OutputStreamWriter)1 Writer (java.io.Writer)1 URL (java.net.URL)1 ArrayList (java.util.ArrayList)1 Iterator (java.util.Iterator)1