Search in sources :

Example 1 with LlapOptions

use of org.apache.hadoop.hive.llap.cli.LlapOptionsProcessor.LlapOptions in project hive by apache.

the class LlapServiceDriver method run.

private int run(String[] args) throws Exception {
    LlapOptionsProcessor optionsProcessor = new LlapOptionsProcessor();
    final LlapOptions options = optionsProcessor.processOptions(args);
    final Properties propsDirectOptions = new Properties();
    if (options == null) {
        // help
        return 1;
    }
    // Working directory.
    Path tmpDir = new Path(options.getDirectory());
    if (conf == null) {
        throw new Exception("Cannot load any configuration to run command");
    }
    final long t0 = System.nanoTime();
    final FileSystem fs = FileSystem.get(conf);
    final FileSystem lfs = FileSystem.getLocal(conf).getRawFileSystem();
    int threadCount = Math.max(1, Runtime.getRuntime().availableProcessors() / 2);
    final ExecutorService executor = Executors.newFixedThreadPool(threadCount, new ThreadFactoryBuilder().setNameFormat("llap-pkg-%d").build());
    final CompletionService<Void> asyncRunner = new ExecutorCompletionService<Void>(executor);
    int rc = 0;
    try {
        // needed so that the file is actually loaded into configuration.
        for (String f : NEEDED_CONFIGS) {
            conf.addResource(f);
            if (conf.getResource(f) == null) {
                throw new Exception("Unable to find required config file: " + f);
            }
        }
        for (String f : OPTIONAL_CONFIGS) {
            conf.addResource(f);
        }
        conf.reloadConfiguration();
        populateConfWithLlapProperties(conf, options.getConfig());
        if (options.getName() != null) {
            // update service registry configs - caveat: this has nothing to do with the actual settings
            // as read by the AM
            // if needed, use --hiveconf llap.daemon.service.hosts=@llap0 to dynamically switch between
            // instances
            conf.set(ConfVars.LLAP_DAEMON_SERVICE_HOSTS.varname, "@" + options.getName());
            propsDirectOptions.setProperty(ConfVars.LLAP_DAEMON_SERVICE_HOSTS.varname, "@" + options.getName());
        }
        if (options.getLogger() != null) {
            HiveConf.setVar(conf, ConfVars.LLAP_DAEMON_LOGGER, options.getLogger());
            propsDirectOptions.setProperty(ConfVars.LLAP_DAEMON_LOGGER.varname, options.getLogger());
        }
        boolean isDirect = HiveConf.getBoolVar(conf, HiveConf.ConfVars.LLAP_ALLOCATOR_DIRECT);
        if (options.getSize() != -1) {
            if (options.getCache() != -1) {
                if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.LLAP_ALLOCATOR_MAPPED) == false) {
                    // direct heap allocations need to be safer
                    Preconditions.checkArgument(options.getCache() < options.getSize(), "Cache size (" + LlapUtil.humanReadableByteCount(options.getCache()) + ") has to be smaller" + " than the container sizing (" + LlapUtil.humanReadableByteCount(options.getSize()) + ")");
                } else if (options.getCache() < options.getSize()) {
                    LOG.warn("Note that this might need YARN physical memory monitoring to be turned off " + "(yarn.nodemanager.pmem-check-enabled=false)");
                }
            }
            if (options.getXmx() != -1) {
                Preconditions.checkArgument(options.getXmx() < options.getSize(), "Working memory (Xmx=" + LlapUtil.humanReadableByteCount(options.getXmx()) + ") has to be" + " smaller than the container sizing (" + LlapUtil.humanReadableByteCount(options.getSize()) + ")");
            }
            if (isDirect && !HiveConf.getBoolVar(conf, HiveConf.ConfVars.LLAP_ALLOCATOR_MAPPED)) {
                // direct and not memory mapped
                Preconditions.checkArgument(options.getXmx() + options.getCache() <= options.getSize(), "Working memory (Xmx=" + LlapUtil.humanReadableByteCount(options.getXmx()) + ") + cache size (" + LlapUtil.humanReadableByteCount(options.getCache()) + ") has to be smaller than the container sizing (" + LlapUtil.humanReadableByteCount(options.getSize()) + ")");
            }
        }
        if (options.getExecutors() != -1) {
            conf.setLong(ConfVars.LLAP_DAEMON_NUM_EXECUTORS.varname, options.getExecutors());
            propsDirectOptions.setProperty(ConfVars.LLAP_DAEMON_NUM_EXECUTORS.varname, String.valueOf(options.getExecutors()));
        // TODO: vcpu settings - possibly when DRFA works right
        }
        if (options.getIoThreads() != -1) {
            conf.setLong(ConfVars.LLAP_IO_THREADPOOL_SIZE.varname, options.getIoThreads());
            propsDirectOptions.setProperty(ConfVars.LLAP_IO_THREADPOOL_SIZE.varname, String.valueOf(options.getIoThreads()));
        }
        long cache = -1, xmx = -1;
        if (options.getCache() != -1) {
            cache = options.getCache();
            conf.set(HiveConf.ConfVars.LLAP_IO_MEMORY_MAX_SIZE.varname, Long.toString(cache));
            propsDirectOptions.setProperty(HiveConf.ConfVars.LLAP_IO_MEMORY_MAX_SIZE.varname, Long.toString(cache));
        }
        if (options.getXmx() != -1) {
            // Needs more explanation here
            // Xmx is not the max heap value in JDK8. You need to subtract 50% of the survivor fraction
            // from this, to get actual usable memory before it goes into GC
            xmx = options.getXmx();
            long xmxMb = (xmx / (1024L * 1024L));
            conf.setLong(ConfVars.LLAP_DAEMON_MEMORY_PER_INSTANCE_MB.varname, xmxMb);
            propsDirectOptions.setProperty(ConfVars.LLAP_DAEMON_MEMORY_PER_INSTANCE_MB.varname, String.valueOf(xmxMb));
        }
        long size = options.getSize();
        if (size == -1) {
            long heapSize = xmx;
            if (!isDirect) {
                heapSize += cache;
            }
            size = Math.min((long) (heapSize * 1.2), heapSize + 1024L * 1024 * 1024);
            if (isDirect) {
                size += cache;
            }
        }
        long containerSize = size / (1024 * 1024);
        final long minAlloc = conf.getInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, -1);
        Preconditions.checkArgument(containerSize >= minAlloc, "Container size (" + LlapUtil.humanReadableByteCount(options.getSize()) + ") should be greater" + " than minimum allocation(" + LlapUtil.humanReadableByteCount(minAlloc * 1024L * 1024L) + ")");
        conf.setLong(ConfVars.LLAP_DAEMON_YARN_CONTAINER_MB.varname, containerSize);
        propsDirectOptions.setProperty(ConfVars.LLAP_DAEMON_YARN_CONTAINER_MB.varname, String.valueOf(containerSize));
        LOG.info("Memory settings: container memory: {} executor memory: {} cache memory: {}", LlapUtil.humanReadableByteCount(options.getSize()), LlapUtil.humanReadableByteCount(options.getXmx()), LlapUtil.humanReadableByteCount(options.getCache()));
        if (options.getLlapQueueName() != null && !options.getLlapQueueName().isEmpty()) {
            conf.set(ConfVars.LLAP_DAEMON_QUEUE_NAME.varname, options.getLlapQueueName());
            propsDirectOptions.setProperty(ConfVars.LLAP_DAEMON_QUEUE_NAME.varname, options.getLlapQueueName());
        }
        final URL logger = conf.getResource(LlapConstants.LOG4j2_PROPERTIES_FILE);
        if (null == logger) {
            throw new Exception("Unable to find required config file: llap-daemon-log4j2.properties");
        }
        Path home = new Path(System.getenv("HIVE_HOME"));
        Path scriptParent = new Path(new Path(home, "scripts"), "llap");
        Path scripts = new Path(scriptParent, "bin");
        if (!lfs.exists(home)) {
            throw new Exception("Unable to find HIVE_HOME:" + home);
        } else if (!lfs.exists(scripts)) {
            LOG.warn("Unable to find llap scripts:" + scripts);
        }
        final Path libDir = new Path(tmpDir, "lib");
        final Path tezDir = new Path(libDir, "tez");
        final Path udfDir = new Path(libDir, "udfs");
        final Path confPath = new Path(tmpDir, "conf");
        lfs.mkdirs(confPath);
        NamedCallable<Void> downloadTez = new NamedCallable<Void>("downloadTez") {

            @Override
            public Void call() throws Exception {
                synchronized (fs) {
                    String tezLibs = conf.get(TezConfiguration.TEZ_LIB_URIS);
                    if (tezLibs == null) {
                        LOG.warn("Missing tez.lib.uris in tez-site.xml");
                    }
                    if (LOG.isDebugEnabled()) {
                        LOG.debug("Copying tez libs from " + tezLibs);
                    }
                    lfs.mkdirs(tezDir);
                    fs.copyToLocalFile(new Path(tezLibs), new Path(libDir, "tez.tar.gz"));
                    CompressionUtils.unTar(new Path(libDir, "tez.tar.gz").toString(), tezDir.toString(), true);
                    lfs.delete(new Path(libDir, "tez.tar.gz"), false);
                }
                return null;
            }
        };
        NamedCallable<Void> copyLocalJars = new NamedCallable<Void>("copyLocalJars") {

            @Override
            public Void call() throws Exception {
                Class<?>[] dependencies = new Class<?>[] { // llap-common
                LlapDaemonProtocolProtos.class, // llap-tez
                LlapTezUtils.class, // llap-server
                LlapInputFormat.class, // hive-exec
                HiveInputFormat.class, // hive-common (https deps)
                SslSocketConnector.class, // ZK registry
                RegistryUtils.ServiceRecordMarshal.class, // disruptor
                com.lmax.disruptor.RingBuffer.class, // log4j-api
                org.apache.logging.log4j.Logger.class, // log4j-core
                org.apache.logging.log4j.core.Appender.class, // log4j-slf4j
                org.apache.logging.slf4j.Log4jLogger.class, // log4j-1.2-API needed for NDC
                org.apache.log4j.NDC.class };
                for (Class<?> c : dependencies) {
                    Path jarPath = new Path(Utilities.jarFinderGetJar(c));
                    lfs.copyFromLocalFile(jarPath, libDir);
                    if (LOG.isDebugEnabled()) {
                        LOG.debug("Copying " + jarPath + " to " + libDir);
                    }
                }
                return null;
            }
        };
        // copy default aux classes (json/hbase)
        NamedCallable<Void> copyAuxJars = new NamedCallable<Void>("copyAuxJars") {

            @Override
            public Void call() throws Exception {
                for (String className : DEFAULT_AUX_CLASSES) {
                    localizeJarForClass(lfs, libDir, className, false);
                }
                Collection<String> codecs = conf.getStringCollection("io.compression.codecs");
                if (codecs != null) {
                    for (String codecClassName : codecs) {
                        localizeJarForClass(lfs, libDir, codecClassName, false);
                    }
                }
                if (options.getIsHBase()) {
                    try {
                        localizeJarForClass(lfs, libDir, HBASE_SERDE_CLASS, true);
                        // HBase API is convoluted.
                        Job fakeJob = new Job(new JobConf());
                        TableMapReduceUtil.addDependencyJars(fakeJob);
                        Collection<String> hbaseJars = fakeJob.getConfiguration().getStringCollection("tmpjars");
                        for (String jarPath : hbaseJars) {
                            if (!jarPath.isEmpty()) {
                                lfs.copyFromLocalFile(new Path(jarPath), libDir);
                            }
                        }
                    } catch (Throwable t) {
                        String err = "Failed to add HBase jars. Use --auxhbase=false to avoid localizing them";
                        LOG.error(err);
                        System.err.println(err);
                        throw new RuntimeException(t);
                    }
                }
                HashSet<String> auxJars = new HashSet<>();
                // There are many ways to have AUX jars in Hive... sigh
                if (options.getIsHiveAux()) {
                    // Note: we don't add ADDED jars, RELOADABLE jars, etc. That is by design; there are too many ways
                    // to add jars in Hive, some of which are session/etc. specific. Env + conf + arg should be enough.
                    addAuxJarsToSet(auxJars, conf.getAuxJars());
                    addAuxJarsToSet(auxJars, System.getenv("HIVE_AUX_JARS_PATH"));
                    LOG.info("Adding the following aux jars from the environment and configs: " + auxJars);
                }
                addAuxJarsToSet(auxJars, options.getAuxJars());
                for (String jarPath : auxJars) {
                    lfs.copyFromLocalFile(new Path(jarPath), libDir);
                }
                return null;
            }

            private void addAuxJarsToSet(HashSet<String> auxJarSet, String auxJars) {
                if (auxJars != null && !auxJars.isEmpty()) {
                    // TODO: transitive dependencies warning?
                    String[] jarPaths = auxJars.split(",");
                    for (String jarPath : jarPaths) {
                        if (!jarPath.isEmpty()) {
                            auxJarSet.add(jarPath);
                        }
                    }
                }
            }
        };
        NamedCallable<Void> copyUdfJars = new NamedCallable<Void>("copyUdfJars") {

            @Override
            public Void call() throws Exception {
                // UDFs
                final Set<String> allowedUdfs;
                if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.LLAP_ALLOW_PERMANENT_FNS)) {
                    synchronized (fs) {
                        allowedUdfs = downloadPermanentFunctions(conf, udfDir);
                    }
                } else {
                    allowedUdfs = Collections.emptySet();
                }
                PrintWriter udfStream = new PrintWriter(lfs.create(new Path(confPath, StaticPermanentFunctionChecker.PERMANENT_FUNCTIONS_LIST)));
                for (String udfClass : allowedUdfs) {
                    udfStream.println(udfClass);
                }
                udfStream.close();
                return null;
            }
        };
        String java_home;
        if (options.getJavaPath() == null || options.getJavaPath().isEmpty()) {
            java_home = System.getenv("JAVA_HOME");
            String jre_home = System.getProperty("java.home");
            if (java_home == null) {
                java_home = jre_home;
            } else if (!java_home.equals(jre_home)) {
                LOG.warn("Java versions might not match : JAVA_HOME=[{}],process jre=[{}]", java_home, jre_home);
            }
        } else {
            java_home = options.getJavaPath();
        }
        if (java_home == null || java_home.isEmpty()) {
            throw new RuntimeException("Could not determine JAVA_HOME from command line parameters, environment or system properties");
        }
        LOG.info("Using [{}] for JAVA_HOME", java_home);
        NamedCallable<Void> copyConfigs = new NamedCallable<Void>("copyConfigs") {

            @Override
            public Void call() throws Exception {
                // Copy over the mandatory configs for the package.
                for (String f : NEEDED_CONFIGS) {
                    copyConfig(lfs, confPath, f);
                }
                for (String f : OPTIONAL_CONFIGS) {
                    try {
                        copyConfig(lfs, confPath, f);
                    } catch (Throwable t) {
                        LOG.info("Error getting an optional config " + f + "; ignoring: " + t.getMessage());
                    }
                }
                createLlapDaemonConfig(lfs, confPath, conf, propsDirectOptions, options.getConfig());
                setUpLogAndMetricConfigs(lfs, logger, confPath);
                return null;
            }
        };
        @SuppressWarnings("unchecked") final NamedCallable<Void>[] asyncWork = new NamedCallable[] { downloadTez, copyUdfJars, copyLocalJars, copyAuxJars, copyConfigs };
        @SuppressWarnings("unchecked") final Future<Void>[] asyncResults = new Future[asyncWork.length];
        for (int i = 0; i < asyncWork.length; i++) {
            asyncResults[i] = asyncRunner.submit(asyncWork[i]);
        }
        // TODO: need to move from Python to Java for the rest of the script.
        JSONObject configs = createConfigJson(containerSize, cache, xmx, java_home);
        writeConfigJson(tmpDir, lfs, configs);
        if (LOG.isDebugEnabled()) {
            LOG.debug("Config generation took " + (System.nanoTime() - t0) + " ns");
        }
        for (int i = 0; i < asyncWork.length; i++) {
            final long t1 = System.nanoTime();
            asyncResults[i].get();
            final long t2 = System.nanoTime();
            if (LOG.isDebugEnabled()) {
                LOG.debug(asyncWork[i].getName() + " waited for " + (t2 - t1) + " ns");
            }
        }
        if (options.isStarting()) {
            String version = System.getenv("HIVE_VERSION");
            if (version == null || version.isEmpty()) {
                version = DateTime.now().toString("ddMMMyyyy");
            }
            String outputDir = options.getOutput();
            Path packageDir = null;
            if (outputDir == null) {
                outputDir = OUTPUT_DIR_PREFIX + version;
                packageDir = new Path(Paths.get(".").toAbsolutePath().toString(), OUTPUT_DIR_PREFIX + version);
            } else {
                packageDir = new Path(outputDir);
            }
            rc = runPackagePy(args, tmpDir, scriptParent, version, outputDir);
            if (rc == 0) {
                LlapSliderUtils.startCluster(conf, options.getName(), "llap-" + version + ".zip", packageDir, HiveConf.getVar(conf, ConfVars.LLAP_DAEMON_QUEUE_NAME));
            }
        } else {
            rc = 0;
        }
    } finally {
        executor.shutdown();
        lfs.close();
        fs.close();
    }
    if (rc == 0) {
        if (LOG.isDebugEnabled()) {
            LOG.debug("Exiting successfully");
        }
    } else {
        LOG.info("Exiting with rc = " + rc);
    }
    return rc;
}
Also used : FileSystem(org.apache.hadoop.fs.FileSystem) LlapDaemonProtocolProtos(org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos) Job(org.apache.hadoop.mapreduce.Job) JobConf(org.apache.hadoop.mapred.JobConf) HashSet(java.util.HashSet) JSONObject(org.json.JSONObject) LlapTezUtils(org.apache.hadoop.hive.llap.tezplugins.LlapTezUtils) SslSocketConnector(org.eclipse.jetty.server.ssl.SslSocketConnector) ExecutorCompletionService(java.util.concurrent.ExecutorCompletionService) Properties(java.util.Properties) Logger(org.slf4j.Logger) URL(java.net.URL) HiveInputFormat(org.apache.hadoop.hive.ql.io.HiveInputFormat) ThreadFactoryBuilder(com.google.common.util.concurrent.ThreadFactoryBuilder) LlapInputFormat(org.apache.hadoop.hive.llap.io.api.impl.LlapInputFormat) LlapOptions(org.apache.hadoop.hive.llap.cli.LlapOptionsProcessor.LlapOptions) PrintWriter(java.io.PrintWriter) Path(org.apache.hadoop.fs.Path) URISyntaxException(java.net.URISyntaxException) JSONException(org.json.JSONException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) IOException(java.io.IOException) ExecutorService(java.util.concurrent.ExecutorService) Future(java.util.concurrent.Future)

Aggregations

ThreadFactoryBuilder (com.google.common.util.concurrent.ThreadFactoryBuilder)1 IOException (java.io.IOException)1 PrintWriter (java.io.PrintWriter)1 URISyntaxException (java.net.URISyntaxException)1 URL (java.net.URL)1 HashSet (java.util.HashSet)1 Properties (java.util.Properties)1 ExecutorCompletionService (java.util.concurrent.ExecutorCompletionService)1 ExecutorService (java.util.concurrent.ExecutorService)1 Future (java.util.concurrent.Future)1 FileSystem (org.apache.hadoop.fs.FileSystem)1 Path (org.apache.hadoop.fs.Path)1 LlapOptions (org.apache.hadoop.hive.llap.cli.LlapOptionsProcessor.LlapOptions)1 LlapDaemonProtocolProtos (org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos)1 LlapInputFormat (org.apache.hadoop.hive.llap.io.api.impl.LlapInputFormat)1 LlapTezUtils (org.apache.hadoop.hive.llap.tezplugins.LlapTezUtils)1 HiveInputFormat (org.apache.hadoop.hive.ql.io.HiveInputFormat)1 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)1 JobConf (org.apache.hadoop.mapred.JobConf)1 Job (org.apache.hadoop.mapreduce.Job)1