Search in sources :

Example 21 with ThreadFactoryBuilder

use of com.google.common.util.concurrent.ThreadFactoryBuilder in project hive by apache.

the class StatsTask method aggregateStats.

private int aggregateStats(Hive db) {
    StatsAggregator statsAggregator = null;
    int ret = 0;
    StatsCollectionContext scc = null;
    EnvironmentContext environmentContext = null;
    try {
        // Stats setup:
        final Warehouse wh = new Warehouse(conf);
        if (!getWork().getNoStatsAggregator() && !getWork().isNoScanAnalyzeCommand()) {
            try {
                scc = getContext();
                statsAggregator = createStatsAggregator(scc, conf);
            } catch (HiveException e) {
                if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_STATS_RELIABLE)) {
                    throw e;
                }
                console.printError(ErrorMsg.STATS_SKIPPING_BY_ERROR.getErrorCodedMsg(e.toString()));
            }
        }
        List<Partition> partitions = getPartitionsList(db);
        boolean atomic = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_STATS_ATOMIC);
        String tableFullName = table.getDbName() + "." + table.getTableName();
        if (partitions == null) {
            org.apache.hadoop.hive.metastore.api.Table tTable = table.getTTable();
            Map<String, String> parameters = tTable.getParameters();
            // acidTable will not have accurate stats unless it is set through analyze command.
            if (work.getTableSpecs() == null && AcidUtils.isAcidTable(table)) {
                StatsSetupConst.setBasicStatsState(parameters, StatsSetupConst.FALSE);
            } else if (work.getTableSpecs() != null || (work.getLoadTableDesc() != null && work.getLoadTableDesc().getReplace()) || (work.getLoadFileDesc() != null && !work.getLoadFileDesc().getDestinationCreateTable().isEmpty())) {
                StatsSetupConst.setBasicStatsState(parameters, StatsSetupConst.TRUE);
            }
            // non-partitioned tables:
            if (!existStats(parameters) && atomic) {
                return 0;
            }
            // For eg. if a file is being loaded, the old number of rows are not valid
            if (work.isClearAggregatorStats()) {
                // we choose to keep the invalid stats and only change the setting.
                StatsSetupConst.setBasicStatsState(parameters, StatsSetupConst.FALSE);
            }
            updateQuickStats(wh, parameters, tTable.getSd());
            if (StatsSetupConst.areBasicStatsUptoDate(parameters)) {
                if (statsAggregator != null) {
                    String prefix = getAggregationPrefix(table, null);
                    updateStats(statsAggregator, parameters, prefix, atomic);
                }
                // write table stats to metastore
                if (!getWork().getNoStatsAggregator()) {
                    environmentContext = new EnvironmentContext();
                    environmentContext.putToProperties(StatsSetupConst.STATS_GENERATED, StatsSetupConst.TASK);
                }
            }
            getHive().alterTable(tableFullName, new Table(tTable), environmentContext);
            if (conf.getBoolVar(ConfVars.TEZ_EXEC_SUMMARY)) {
                console.printInfo("Table " + tableFullName + " stats: [" + toString(parameters) + ']');
            }
            LOG.info("Table " + tableFullName + " stats: [" + toString(parameters) + ']');
        } else {
            // Partitioned table:
            // Need to get the old stats of the partition
            // and update the table stats based on the old and new stats.
            List<Partition> updates = new ArrayList<Partition>();
            //Get the file status up-front for all partitions. Beneficial in cases of blob storage systems
            final Map<String, FileStatus[]> fileStatusMap = new ConcurrentHashMap<String, FileStatus[]>();
            int poolSize = conf.getInt(ConfVars.HIVE_MOVE_FILES_THREAD_COUNT.varname, 1);
            // In case thread count is set to 0, use single thread.
            poolSize = Math.max(poolSize, 1);
            final ExecutorService pool = Executors.newFixedThreadPool(poolSize, new ThreadFactoryBuilder().setDaemon(true).setNameFormat("stats-updater-thread-%d").build());
            final List<Future<Void>> futures = Lists.newLinkedList();
            LOG.debug("Getting file stats of all partitions. threadpool size:" + poolSize);
            try {
                for (final Partition partn : partitions) {
                    final String partitionName = partn.getName();
                    final org.apache.hadoop.hive.metastore.api.Partition tPart = partn.getTPartition();
                    Map<String, String> parameters = tPart.getParameters();
                    if (!existStats(parameters) && atomic) {
                        continue;
                    }
                    futures.add(pool.submit(new Callable<Void>() {

                        @Override
                        public Void call() throws Exception {
                            FileStatus[] partfileStatus = wh.getFileStatusesForSD(tPart.getSd());
                            fileStatusMap.put(partitionName, partfileStatus);
                            return null;
                        }
                    }));
                }
                pool.shutdown();
                for (Future<Void> future : futures) {
                    future.get();
                }
            } catch (InterruptedException e) {
                LOG.debug("Cancelling " + futures.size() + " file stats lookup tasks");
                //cancel other futures
                for (Future future : futures) {
                    future.cancel(true);
                }
                // Fail the query if the stats are supposed to be reliable
                if (work.isStatsReliable()) {
                    ret = 1;
                }
            } finally {
                if (pool != null) {
                    pool.shutdownNow();
                }
                LOG.debug("Finished getting file stats of all partitions");
            }
            for (Partition partn : partitions) {
                //
                // get the old partition stats
                //
                org.apache.hadoop.hive.metastore.api.Partition tPart = partn.getTPartition();
                Map<String, String> parameters = tPart.getParameters();
                if (work.getTableSpecs() == null && AcidUtils.isAcidTable(table)) {
                    StatsSetupConst.setBasicStatsState(parameters, StatsSetupConst.FALSE);
                } else if (work.getTableSpecs() != null || (work.getLoadTableDesc() != null && work.getLoadTableDesc().getReplace()) || (work.getLoadFileDesc() != null && !work.getLoadFileDesc().getDestinationCreateTable().isEmpty())) {
                    StatsSetupConst.setBasicStatsState(parameters, StatsSetupConst.TRUE);
                }
                //only when the stats exist, it is added to fileStatusMap
                if (!fileStatusMap.containsKey(partn.getName())) {
                    continue;
                }
                // For eg. if a file is being loaded, the old number of rows are not valid
                if (work.isClearAggregatorStats()) {
                    // we choose to keep the invalid stats and only change the setting.
                    StatsSetupConst.setBasicStatsState(parameters, StatsSetupConst.FALSE);
                }
                updateQuickStats(parameters, fileStatusMap.get(partn.getName()));
                if (StatsSetupConst.areBasicStatsUptoDate(parameters)) {
                    if (statsAggregator != null) {
                        String prefix = getAggregationPrefix(table, partn);
                        updateStats(statsAggregator, parameters, prefix, atomic);
                    }
                    if (!getWork().getNoStatsAggregator()) {
                        environmentContext = new EnvironmentContext();
                        environmentContext.putToProperties(StatsSetupConst.STATS_GENERATED, StatsSetupConst.TASK);
                    }
                }
                updates.add(new Partition(table, tPart));
                if (conf.getBoolVar(ConfVars.TEZ_EXEC_SUMMARY)) {
                    console.printInfo("Partition " + tableFullName + partn.getSpec() + " stats: [" + toString(parameters) + ']');
                }
                LOG.info("Partition " + tableFullName + partn.getSpec() + " stats: [" + toString(parameters) + ']');
            }
            if (!updates.isEmpty()) {
                db.alterPartitions(tableFullName, updates, environmentContext);
            }
        }
    } catch (Exception e) {
        console.printInfo("[Warning] could not update stats.", "Failed with exception " + e.getMessage() + "\n" + StringUtils.stringifyException(e));
        // Fail the query if the stats are supposed to be reliable
        if (work.isStatsReliable()) {
            ret = 1;
        }
    } finally {
        if (statsAggregator != null) {
            statsAggregator.closeConnection(scc);
        }
    }
    // anything else indicates failure
    return ret;
}
Also used : Warehouse(org.apache.hadoop.hive.metastore.Warehouse) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) FileStatus(org.apache.hadoop.fs.FileStatus) ArrayList(java.util.ArrayList) Callable(java.util.concurrent.Callable) EnvironmentContext(org.apache.hadoop.hive.metastore.api.EnvironmentContext) ThreadFactoryBuilder(com.google.common.util.concurrent.ThreadFactoryBuilder) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) StatsCollectionContext(org.apache.hadoop.hive.ql.stats.StatsCollectionContext) Partition(org.apache.hadoop.hive.ql.metadata.Partition) Table(org.apache.hadoop.hive.ql.metadata.Table) StatsAggregator(org.apache.hadoop.hive.ql.stats.StatsAggregator) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) ExecutorService(java.util.concurrent.ExecutorService) Future(java.util.concurrent.Future)

Example 22 with ThreadFactoryBuilder

use of com.google.common.util.concurrent.ThreadFactoryBuilder in project hive by apache.

the class HiveMetaStoreChecker method checkPartitionDirs.

/**
   * Assume that depth is 2, i.e., partition columns are a and b
   * tblPath/a=1  => throw exception
   * tblPath/a=1/file => throw exception
   * tblPath/a=1/b=2/file => return a=1/b=2
   * tblPath/a=1/b=2/c=3 => return a=1/b=2
   * tblPath/a=1/b=2/c=3/file => return a=1/b=2
   *
   * @param basePath
   *          Start directory
   * @param allDirs
   *          This set will contain the leaf paths at the end.
   * @param maxDepth
   *          Specify how deep the search goes.
   * @throws IOException
   *           Thrown if we can't get lists from the fs.
   * @throws HiveException
   */
private void checkPartitionDirs(Path basePath, Set<Path> allDirs, int maxDepth) throws IOException, HiveException {
    // Here we just reuse the THREAD_COUNT configuration for
    // METASTORE_FS_HANDLER_THREADS_COUNT since this results in better performance
    // The number of missing partitions discovered are later added by metastore using a
    // threadpool of size METASTORE_FS_HANDLER_THREADS_COUNT. If we have different sized
    // pool here the smaller sized pool of the two becomes a bottleneck
    int poolSize = conf.getInt(ConfVars.METASTORE_FS_HANDLER_THREADS_COUNT.varname, 15);
    ExecutorService executor;
    if (poolSize <= 1) {
        LOG.debug("Using single-threaded version of MSCK-GetPaths");
        executor = MoreExecutors.sameThreadExecutor();
    } else {
        LOG.debug("Using multi-threaded version of MSCK-GetPaths with number of threads " + poolSize);
        ThreadFactory threadFactory = new ThreadFactoryBuilder().setDaemon(true).setNameFormat("MSCK-GetPaths-%d").build();
        executor = (ThreadPoolExecutor) Executors.newFixedThreadPool(poolSize, threadFactory);
    }
    checkPartitionDirs(executor, basePath, allDirs, basePath.getFileSystem(conf), maxDepth);
    executor.shutdown();
}
Also used : ThreadFactory(java.util.concurrent.ThreadFactory) ExecutorService(java.util.concurrent.ExecutorService) ThreadFactoryBuilder(com.google.common.util.concurrent.ThreadFactoryBuilder)

Example 23 with ThreadFactoryBuilder

use of com.google.common.util.concurrent.ThreadFactoryBuilder in project hbase by apache.

the class ThriftServer method createExecutor.

private static ExecutorService createExecutor(int workerThreads, int maxCallQueueSize, ThriftMetrics metrics) {
    CallQueue callQueue;
    if (maxCallQueueSize > 0) {
        callQueue = new CallQueue(new LinkedBlockingQueue<>(maxCallQueueSize), metrics);
    } else {
        callQueue = new CallQueue(new LinkedBlockingQueue<>(), metrics);
    }
    ThreadFactoryBuilder tfb = new ThreadFactoryBuilder();
    tfb.setDaemon(true);
    tfb.setNameFormat("thrift2-worker-%d");
    ThreadPoolExecutor pool = new THBaseThreadPoolExecutor(workerThreads, workerThreads, Long.MAX_VALUE, TimeUnit.SECONDS, callQueue, tfb.build(), metrics);
    pool.prestartAllCoreThreads();
    return pool;
}
Also used : CallQueue(org.apache.hadoop.hbase.thrift.CallQueue) ThreadFactoryBuilder(com.google.common.util.concurrent.ThreadFactoryBuilder) THBaseThreadPoolExecutor(org.apache.hadoop.hbase.thrift.THBaseThreadPoolExecutor) ThreadPoolExecutor(java.util.concurrent.ThreadPoolExecutor) LinkedBlockingQueue(java.util.concurrent.LinkedBlockingQueue) THBaseThreadPoolExecutor(org.apache.hadoop.hbase.thrift.THBaseThreadPoolExecutor)

Example 24 with ThreadFactoryBuilder

use of com.google.common.util.concurrent.ThreadFactoryBuilder in project hive by apache.

the class TestMultiSessionsHS2WithLocalClusterSpark method setUp.

@Before
public void setUp() throws Exception {
    pool = Executors.newFixedThreadPool(PARALLEL_NUMBER, new ThreadFactoryBuilder().setDaemon(true).setNameFormat("Test-Thread-%d").build());
    createConnection();
}
Also used : ThreadFactoryBuilder(com.google.common.util.concurrent.ThreadFactoryBuilder) Before(org.junit.Before)

Example 25 with ThreadFactoryBuilder

use of com.google.common.util.concurrent.ThreadFactoryBuilder in project hive by apache.

the class LlapServiceDriver method run.

private int run(String[] args) throws Exception {
    LlapOptionsProcessor optionsProcessor = new LlapOptionsProcessor();
    final LlapOptions options = optionsProcessor.processOptions(args);
    final Properties propsDirectOptions = new Properties();
    if (options == null) {
        // help
        return 1;
    }
    // Working directory.
    Path tmpDir = new Path(options.getDirectory());
    if (conf == null) {
        throw new Exception("Cannot load any configuration to run command");
    }
    final long t0 = System.nanoTime();
    final FileSystem fs = FileSystem.get(conf);
    final FileSystem lfs = FileSystem.getLocal(conf).getRawFileSystem();
    int threadCount = Math.max(1, Runtime.getRuntime().availableProcessors() / 2);
    final ExecutorService executor = Executors.newFixedThreadPool(threadCount, new ThreadFactoryBuilder().setNameFormat("llap-pkg-%d").build());
    final CompletionService<Void> asyncRunner = new ExecutorCompletionService<Void>(executor);
    int rc = 0;
    try {
        // needed so that the file is actually loaded into configuration.
        for (String f : NEEDED_CONFIGS) {
            conf.addResource(f);
            if (conf.getResource(f) == null) {
                throw new Exception("Unable to find required config file: " + f);
            }
        }
        for (String f : OPTIONAL_CONFIGS) {
            conf.addResource(f);
        }
        conf.reloadConfiguration();
        populateConfWithLlapProperties(conf, options.getConfig());
        if (options.getName() != null) {
            // update service registry configs - caveat: this has nothing to do with the actual settings
            // as read by the AM
            // if needed, use --hiveconf llap.daemon.service.hosts=@llap0 to dynamically switch between
            // instances
            conf.set(ConfVars.LLAP_DAEMON_SERVICE_HOSTS.varname, "@" + options.getName());
            propsDirectOptions.setProperty(ConfVars.LLAP_DAEMON_SERVICE_HOSTS.varname, "@" + options.getName());
        }
        if (options.getLogger() != null) {
            HiveConf.setVar(conf, ConfVars.LLAP_DAEMON_LOGGER, options.getLogger());
            propsDirectOptions.setProperty(ConfVars.LLAP_DAEMON_LOGGER.varname, options.getLogger());
        }
        boolean isDirect = HiveConf.getBoolVar(conf, HiveConf.ConfVars.LLAP_ALLOCATOR_DIRECT);
        if (options.getSize() != -1) {
            if (options.getCache() != -1) {
                if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.LLAP_ALLOCATOR_MAPPED) == false) {
                    // direct heap allocations need to be safer
                    Preconditions.checkArgument(options.getCache() < options.getSize(), "Cache size (" + LlapUtil.humanReadableByteCount(options.getCache()) + ") has to be smaller" + " than the container sizing (" + LlapUtil.humanReadableByteCount(options.getSize()) + ")");
                } else if (options.getCache() < options.getSize()) {
                    LOG.warn("Note that this might need YARN physical memory monitoring to be turned off " + "(yarn.nodemanager.pmem-check-enabled=false)");
                }
            }
            if (options.getXmx() != -1) {
                Preconditions.checkArgument(options.getXmx() < options.getSize(), "Working memory (Xmx=" + LlapUtil.humanReadableByteCount(options.getXmx()) + ") has to be" + " smaller than the container sizing (" + LlapUtil.humanReadableByteCount(options.getSize()) + ")");
            }
            if (isDirect && !HiveConf.getBoolVar(conf, HiveConf.ConfVars.LLAP_ALLOCATOR_MAPPED)) {
                // direct and not memory mapped
                Preconditions.checkArgument(options.getXmx() + options.getCache() <= options.getSize(), "Working memory (Xmx=" + LlapUtil.humanReadableByteCount(options.getXmx()) + ") + cache size (" + LlapUtil.humanReadableByteCount(options.getCache()) + ") has to be smaller than the container sizing (" + LlapUtil.humanReadableByteCount(options.getSize()) + ")");
            }
        }
        if (options.getExecutors() != -1) {
            conf.setLong(ConfVars.LLAP_DAEMON_NUM_EXECUTORS.varname, options.getExecutors());
            propsDirectOptions.setProperty(ConfVars.LLAP_DAEMON_NUM_EXECUTORS.varname, String.valueOf(options.getExecutors()));
        // TODO: vcpu settings - possibly when DRFA works right
        }
        if (options.getIoThreads() != -1) {
            conf.setLong(ConfVars.LLAP_IO_THREADPOOL_SIZE.varname, options.getIoThreads());
            propsDirectOptions.setProperty(ConfVars.LLAP_IO_THREADPOOL_SIZE.varname, String.valueOf(options.getIoThreads()));
        }
        long cache = -1, xmx = -1;
        if (options.getCache() != -1) {
            cache = options.getCache();
            conf.set(HiveConf.ConfVars.LLAP_IO_MEMORY_MAX_SIZE.varname, Long.toString(cache));
            propsDirectOptions.setProperty(HiveConf.ConfVars.LLAP_IO_MEMORY_MAX_SIZE.varname, Long.toString(cache));
        }
        if (options.getXmx() != -1) {
            // Needs more explanation here
            // Xmx is not the max heap value in JDK8. You need to subtract 50% of the survivor fraction
            // from this, to get actual usable memory before it goes into GC
            xmx = options.getXmx();
            long xmxMb = (xmx / (1024L * 1024L));
            conf.setLong(ConfVars.LLAP_DAEMON_MEMORY_PER_INSTANCE_MB.varname, xmxMb);
            propsDirectOptions.setProperty(ConfVars.LLAP_DAEMON_MEMORY_PER_INSTANCE_MB.varname, String.valueOf(xmxMb));
        }
        long size = options.getSize();
        if (size == -1) {
            long heapSize = xmx;
            if (!isDirect) {
                heapSize += cache;
            }
            size = Math.min((long) (heapSize * 1.2), heapSize + 1024L * 1024 * 1024);
            if (isDirect) {
                size += cache;
            }
        }
        long containerSize = size / (1024 * 1024);
        final long minAlloc = conf.getInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, -1);
        Preconditions.checkArgument(containerSize >= minAlloc, "Container size (" + LlapUtil.humanReadableByteCount(options.getSize()) + ") should be greater" + " than minimum allocation(" + LlapUtil.humanReadableByteCount(minAlloc * 1024L * 1024L) + ")");
        conf.setLong(ConfVars.LLAP_DAEMON_YARN_CONTAINER_MB.varname, containerSize);
        propsDirectOptions.setProperty(ConfVars.LLAP_DAEMON_YARN_CONTAINER_MB.varname, String.valueOf(containerSize));
        LOG.info("Memory settings: container memory: {} executor memory: {} cache memory: {}", LlapUtil.humanReadableByteCount(options.getSize()), LlapUtil.humanReadableByteCount(options.getXmx()), LlapUtil.humanReadableByteCount(options.getCache()));
        if (options.getLlapQueueName() != null && !options.getLlapQueueName().isEmpty()) {
            conf.set(ConfVars.LLAP_DAEMON_QUEUE_NAME.varname, options.getLlapQueueName());
            propsDirectOptions.setProperty(ConfVars.LLAP_DAEMON_QUEUE_NAME.varname, options.getLlapQueueName());
        }
        final URL logger = conf.getResource(LlapConstants.LOG4j2_PROPERTIES_FILE);
        if (null == logger) {
            throw new Exception("Unable to find required config file: llap-daemon-log4j2.properties");
        }
        Path home = new Path(System.getenv("HIVE_HOME"));
        Path scriptParent = new Path(new Path(home, "scripts"), "llap");
        Path scripts = new Path(scriptParent, "bin");
        if (!lfs.exists(home)) {
            throw new Exception("Unable to find HIVE_HOME:" + home);
        } else if (!lfs.exists(scripts)) {
            LOG.warn("Unable to find llap scripts:" + scripts);
        }
        final Path libDir = new Path(tmpDir, "lib");
        final Path tezDir = new Path(libDir, "tez");
        final Path udfDir = new Path(libDir, "udfs");
        final Path confPath = new Path(tmpDir, "conf");
        lfs.mkdirs(confPath);
        NamedCallable<Void> downloadTez = new NamedCallable<Void>("downloadTez") {

            @Override
            public Void call() throws Exception {
                synchronized (fs) {
                    String tezLibs = conf.get(TezConfiguration.TEZ_LIB_URIS);
                    if (tezLibs == null) {
                        LOG.warn("Missing tez.lib.uris in tez-site.xml");
                    }
                    if (LOG.isDebugEnabled()) {
                        LOG.debug("Copying tez libs from " + tezLibs);
                    }
                    lfs.mkdirs(tezDir);
                    fs.copyToLocalFile(new Path(tezLibs), new Path(libDir, "tez.tar.gz"));
                    CompressionUtils.unTar(new Path(libDir, "tez.tar.gz").toString(), tezDir.toString(), true);
                    lfs.delete(new Path(libDir, "tez.tar.gz"), false);
                }
                return null;
            }
        };
        NamedCallable<Void> copyLocalJars = new NamedCallable<Void>("copyLocalJars") {

            @Override
            public Void call() throws Exception {
                Class<?>[] dependencies = new Class<?>[] { // llap-common
                LlapDaemonProtocolProtos.class, // llap-tez
                LlapTezUtils.class, // llap-server
                LlapInputFormat.class, // hive-exec
                HiveInputFormat.class, // hive-common (https deps)
                SslSocketConnector.class, // ZK registry
                RegistryUtils.ServiceRecordMarshal.class, // disruptor
                com.lmax.disruptor.RingBuffer.class, // log4j-api
                org.apache.logging.log4j.Logger.class, // log4j-core
                org.apache.logging.log4j.core.Appender.class, // log4j-slf4j
                org.apache.logging.slf4j.Log4jLogger.class, // log4j-1.2-API needed for NDC
                org.apache.log4j.NDC.class };
                for (Class<?> c : dependencies) {
                    Path jarPath = new Path(Utilities.jarFinderGetJar(c));
                    lfs.copyFromLocalFile(jarPath, libDir);
                    if (LOG.isDebugEnabled()) {
                        LOG.debug("Copying " + jarPath + " to " + libDir);
                    }
                }
                return null;
            }
        };
        // copy default aux classes (json/hbase)
        NamedCallable<Void> copyAuxJars = new NamedCallable<Void>("copyAuxJars") {

            @Override
            public Void call() throws Exception {
                for (String className : DEFAULT_AUX_CLASSES) {
                    localizeJarForClass(lfs, libDir, className, false);
                }
                Collection<String> codecs = conf.getStringCollection("io.compression.codecs");
                if (codecs != null) {
                    for (String codecClassName : codecs) {
                        localizeJarForClass(lfs, libDir, codecClassName, false);
                    }
                }
                if (options.getIsHBase()) {
                    try {
                        localizeJarForClass(lfs, libDir, HBASE_SERDE_CLASS, true);
                        // HBase API is convoluted.
                        Job fakeJob = new Job(new JobConf());
                        TableMapReduceUtil.addDependencyJars(fakeJob);
                        Collection<String> hbaseJars = fakeJob.getConfiguration().getStringCollection("tmpjars");
                        for (String jarPath : hbaseJars) {
                            if (!jarPath.isEmpty()) {
                                lfs.copyFromLocalFile(new Path(jarPath), libDir);
                            }
                        }
                    } catch (Throwable t) {
                        String err = "Failed to add HBase jars. Use --auxhbase=false to avoid localizing them";
                        LOG.error(err);
                        System.err.println(err);
                        throw new RuntimeException(t);
                    }
                }
                HashSet<String> auxJars = new HashSet<>();
                // There are many ways to have AUX jars in Hive... sigh
                if (options.getIsHiveAux()) {
                    // Note: we don't add ADDED jars, RELOADABLE jars, etc. That is by design; there are too many ways
                    // to add jars in Hive, some of which are session/etc. specific. Env + conf + arg should be enough.
                    addAuxJarsToSet(auxJars, conf.getAuxJars());
                    addAuxJarsToSet(auxJars, System.getenv("HIVE_AUX_JARS_PATH"));
                    LOG.info("Adding the following aux jars from the environment and configs: " + auxJars);
                }
                addAuxJarsToSet(auxJars, options.getAuxJars());
                for (String jarPath : auxJars) {
                    lfs.copyFromLocalFile(new Path(jarPath), libDir);
                }
                return null;
            }

            private void addAuxJarsToSet(HashSet<String> auxJarSet, String auxJars) {
                if (auxJars != null && !auxJars.isEmpty()) {
                    // TODO: transitive dependencies warning?
                    String[] jarPaths = auxJars.split(",");
                    for (String jarPath : jarPaths) {
                        if (!jarPath.isEmpty()) {
                            auxJarSet.add(jarPath);
                        }
                    }
                }
            }
        };
        NamedCallable<Void> copyUdfJars = new NamedCallable<Void>("copyUdfJars") {

            @Override
            public Void call() throws Exception {
                // UDFs
                final Set<String> allowedUdfs;
                if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.LLAP_ALLOW_PERMANENT_FNS)) {
                    synchronized (fs) {
                        allowedUdfs = downloadPermanentFunctions(conf, udfDir);
                    }
                } else {
                    allowedUdfs = Collections.emptySet();
                }
                PrintWriter udfStream = new PrintWriter(lfs.create(new Path(confPath, StaticPermanentFunctionChecker.PERMANENT_FUNCTIONS_LIST)));
                for (String udfClass : allowedUdfs) {
                    udfStream.println(udfClass);
                }
                udfStream.close();
                return null;
            }
        };
        String java_home;
        if (options.getJavaPath() == null || options.getJavaPath().isEmpty()) {
            java_home = System.getenv("JAVA_HOME");
            String jre_home = System.getProperty("java.home");
            if (java_home == null) {
                java_home = jre_home;
            } else if (!java_home.equals(jre_home)) {
                LOG.warn("Java versions might not match : JAVA_HOME=[{}],process jre=[{}]", java_home, jre_home);
            }
        } else {
            java_home = options.getJavaPath();
        }
        if (java_home == null || java_home.isEmpty()) {
            throw new RuntimeException("Could not determine JAVA_HOME from command line parameters, environment or system properties");
        }
        LOG.info("Using [{}] for JAVA_HOME", java_home);
        NamedCallable<Void> copyConfigs = new NamedCallable<Void>("copyConfigs") {

            @Override
            public Void call() throws Exception {
                // Copy over the mandatory configs for the package.
                for (String f : NEEDED_CONFIGS) {
                    copyConfig(lfs, confPath, f);
                }
                for (String f : OPTIONAL_CONFIGS) {
                    try {
                        copyConfig(lfs, confPath, f);
                    } catch (Throwable t) {
                        LOG.info("Error getting an optional config " + f + "; ignoring: " + t.getMessage());
                    }
                }
                createLlapDaemonConfig(lfs, confPath, conf, propsDirectOptions, options.getConfig());
                setUpLogAndMetricConfigs(lfs, logger, confPath);
                return null;
            }
        };
        @SuppressWarnings("unchecked") final NamedCallable<Void>[] asyncWork = new NamedCallable[] { downloadTez, copyUdfJars, copyLocalJars, copyAuxJars, copyConfigs };
        @SuppressWarnings("unchecked") final Future<Void>[] asyncResults = new Future[asyncWork.length];
        for (int i = 0; i < asyncWork.length; i++) {
            asyncResults[i] = asyncRunner.submit(asyncWork[i]);
        }
        // TODO: need to move from Python to Java for the rest of the script.
        JSONObject configs = createConfigJson(containerSize, cache, xmx, java_home);
        writeConfigJson(tmpDir, lfs, configs);
        if (LOG.isDebugEnabled()) {
            LOG.debug("Config generation took " + (System.nanoTime() - t0) + " ns");
        }
        for (int i = 0; i < asyncWork.length; i++) {
            final long t1 = System.nanoTime();
            asyncResults[i].get();
            final long t2 = System.nanoTime();
            if (LOG.isDebugEnabled()) {
                LOG.debug(asyncWork[i].getName() + " waited for " + (t2 - t1) + " ns");
            }
        }
        if (options.isStarting()) {
            String version = System.getenv("HIVE_VERSION");
            if (version == null || version.isEmpty()) {
                version = DateTime.now().toString("ddMMMyyyy");
            }
            String outputDir = options.getOutput();
            Path packageDir = null;
            if (outputDir == null) {
                outputDir = OUTPUT_DIR_PREFIX + version;
                packageDir = new Path(Paths.get(".").toAbsolutePath().toString(), OUTPUT_DIR_PREFIX + version);
            } else {
                packageDir = new Path(outputDir);
            }
            rc = runPackagePy(args, tmpDir, scriptParent, version, outputDir);
            if (rc == 0) {
                LlapSliderUtils.startCluster(conf, options.getName(), "llap-" + version + ".zip", packageDir, HiveConf.getVar(conf, ConfVars.LLAP_DAEMON_QUEUE_NAME));
            }
        } else {
            rc = 0;
        }
    } finally {
        executor.shutdown();
        lfs.close();
        fs.close();
    }
    if (rc == 0) {
        if (LOG.isDebugEnabled()) {
            LOG.debug("Exiting successfully");
        }
    } else {
        LOG.info("Exiting with rc = " + rc);
    }
    return rc;
}
Also used : FileSystem(org.apache.hadoop.fs.FileSystem) LlapDaemonProtocolProtos(org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos) Job(org.apache.hadoop.mapreduce.Job) JobConf(org.apache.hadoop.mapred.JobConf) HashSet(java.util.HashSet) JSONObject(org.json.JSONObject) LlapTezUtils(org.apache.hadoop.hive.llap.tezplugins.LlapTezUtils) SslSocketConnector(org.eclipse.jetty.server.ssl.SslSocketConnector) ExecutorCompletionService(java.util.concurrent.ExecutorCompletionService) Properties(java.util.Properties) Logger(org.slf4j.Logger) URL(java.net.URL) HiveInputFormat(org.apache.hadoop.hive.ql.io.HiveInputFormat) ThreadFactoryBuilder(com.google.common.util.concurrent.ThreadFactoryBuilder) LlapInputFormat(org.apache.hadoop.hive.llap.io.api.impl.LlapInputFormat) LlapOptions(org.apache.hadoop.hive.llap.cli.LlapOptionsProcessor.LlapOptions) PrintWriter(java.io.PrintWriter) Path(org.apache.hadoop.fs.Path) URISyntaxException(java.net.URISyntaxException) JSONException(org.json.JSONException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) IOException(java.io.IOException) ExecutorService(java.util.concurrent.ExecutorService) Future(java.util.concurrent.Future)

Aggregations

ThreadFactoryBuilder (com.google.common.util.concurrent.ThreadFactoryBuilder)124 ThreadFactory (java.util.concurrent.ThreadFactory)38 ExecutorService (java.util.concurrent.ExecutorService)35 IOException (java.io.IOException)19 ThreadPoolExecutor (java.util.concurrent.ThreadPoolExecutor)18 Future (java.util.concurrent.Future)16 ExecutionException (java.util.concurrent.ExecutionException)14 ArrayList (java.util.ArrayList)10 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)10 HashMap (java.util.HashMap)9 HashSet (java.util.HashSet)9 Callable (java.util.concurrent.Callable)9 ScheduledExecutorService (java.util.concurrent.ScheduledExecutorService)9 Path (org.apache.hadoop.fs.Path)9 Test (org.junit.Test)9 LinkedList (java.util.LinkedList)8 Map (java.util.Map)8 Before (org.junit.Before)8 LinkedBlockingQueue (java.util.concurrent.LinkedBlockingQueue)7 ScheduledThreadPoolExecutor (java.util.concurrent.ScheduledThreadPoolExecutor)7