Search in sources :

Example 66 with ThreadFactoryBuilder

use of com.google.common.util.concurrent.ThreadFactoryBuilder in project apex-malhar by apache.

the class SimpleKafkaConsumer method start.

@Override
public void start() {
    monitorException = new AtomicReference<Throwable>(null);
    monitorExceptionCount = new AtomicInteger(0);
    super.start();
    // thread to consume the kafka data
    kafkaConsumerExecutor = Executors.newCachedThreadPool(new ThreadFactoryBuilder().setNameFormat("kafka-consumer-" + topic + "-%d").build());
    if (metadataRefreshInterval <= 0 || CollectionUtils.isEmpty(kps)) {
        return;
    }
    // background thread to monitor the kafka metadata change
    metadataRefreshExecutor = Executors.newScheduledThreadPool(1, new ThreadFactoryBuilder().setNameFormat("kafka-consumer-monitor-" + topic + "-%d").setDaemon(true).build());
    // start one monitor thread to monitor the leader broker change and trigger some action
    metadataRefreshExecutor.scheduleAtFixedRate(new MetaDataMonitorTask(this), 0, metadataRefreshInterval, TimeUnit.MILLISECONDS);
}
Also used : AtomicInteger(java.util.concurrent.atomic.AtomicInteger) ThreadFactoryBuilder(com.google.common.util.concurrent.ThreadFactoryBuilder)

Example 67 with ThreadFactoryBuilder

use of com.google.common.util.concurrent.ThreadFactoryBuilder in project apex-malhar by apache.

the class KafkaConsumerWrapper method start.

/**
 * This method is called in the activate method of the operator
 */
public void start(boolean waitForReplay) {
    this.waitForReplay = waitForReplay;
    isAlive.set(true);
    // thread to consume the kafka data
    // create thread pool for consumer threads
    kafkaConsumerExecutor = Executors.newCachedThreadPool(new ThreadFactoryBuilder().setNameFormat("kafka-consumer-%d").build());
    // group list of PartitionMeta by cluster
    Map<String, List<TopicPartition>> consumerAssignment = new HashMap<>();
    Set<AbstractKafkaPartitioner.PartitionMeta> assignments = ownerOperator.assignment();
    for (AbstractKafkaPartitioner.PartitionMeta partitionMeta : assignments) {
        String cluster = partitionMeta.getCluster();
        List<TopicPartition> cAssignment = consumerAssignment.get(cluster);
        if (cAssignment == null) {
            cAssignment = new LinkedList<>();
            consumerAssignment.put(cluster, cAssignment);
        }
        cAssignment.add(new TopicPartition(partitionMeta.getTopic(), partitionMeta.getPartitionId()));
    }
    Map<AbstractKafkaPartitioner.PartitionMeta, Long> currentOffset = ownerOperator.getOffsetTrack();
    // each thread use one KafkaConsumer to consume from 1+ partition(s) of 1+ topic(s)
    for (Map.Entry<String, List<TopicPartition>> e : consumerAssignment.entrySet()) {
        Properties prop = new Properties();
        if (ownerOperator.getConsumerProps() != null) {
            prop.putAll(ownerOperator.getConsumerProps());
        }
        prop.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, e.getKey());
        prop.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "none");
        // never auto commit the offsets
        prop.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false");
        prop.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class.getName());
        prop.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class.getName());
        AbstractKafkaInputOperator.InitialOffset initialOffset = AbstractKafkaInputOperator.InitialOffset.valueOf(ownerOperator.getInitialOffset());
        if (initialOffset == AbstractKafkaInputOperator.InitialOffset.APPLICATION_OR_EARLIEST || initialOffset == AbstractKafkaInputOperator.InitialOffset.APPLICATION_OR_LATEST) {
            // commit the offset with application name if we set initialoffset to application
            prop.put(ConsumerConfig.GROUP_ID_CONFIG, ownerOperator.getApplicationName() + "_Consumer");
        }
        AbstractKafkaConsumer kc = ownerOperator.createConsumer(prop);
        kc.assignPartitions(e.getValue());
        if (logger.isInfoEnabled()) {
            logger.info("Create consumer with properties {} ", Joiner.on(";").withKeyValueSeparator("=").join(prop));
            logger.info("Assign consumer to {}", Joiner.on('#').join(e.getValue()));
        }
        if (currentOffset != null && !currentOffset.isEmpty()) {
            for (TopicPartition tp : e.getValue()) {
                AbstractKafkaPartitioner.PartitionMeta partitionKey = new AbstractKafkaPartitioner.PartitionMeta(e.getKey(), tp.topic(), tp.partition());
                if (currentOffset.containsKey(partitionKey)) {
                    kc.seekToOffset(tp, currentOffset.get(partitionKey));
                }
            }
        }
        consumers.put(e.getKey(), kc);
        Future<?> future = kafkaConsumerExecutor.submit(new ConsumerThread(e.getKey(), kc, this));
        kafkaConsumerThreads.add(future);
    }
}
Also used : HashMap(java.util.HashMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) Properties(java.util.Properties) TopicPartition(org.apache.kafka.common.TopicPartition) ThreadFactoryBuilder(com.google.common.util.concurrent.ThreadFactoryBuilder) ArrayList(java.util.ArrayList) LinkedList(java.util.LinkedList) List(java.util.List) ByteArrayDeserializer(org.apache.kafka.common.serialization.ByteArrayDeserializer) HashMap(java.util.HashMap) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap)

Example 68 with ThreadFactoryBuilder

use of com.google.common.util.concurrent.ThreadFactoryBuilder in project hive by apache.

the class Utilities method getInputSummary.

/**
 * Calculate the total size of input files.
 *
 * @param ctx
 *          the hadoop job context
 * @param work
 *          map reduce job plan
 * @param filter
 *          filter to apply to the input paths before calculating size
 * @return the summary of all the input paths.
 * @throws IOException
 */
public static ContentSummary getInputSummary(final Context ctx, MapWork work, PathFilter filter) throws IOException {
    PerfLogger perfLogger = SessionState.getPerfLogger();
    perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.INPUT_SUMMARY);
    long[] summary = { 0, 0, 0 };
    final Set<Path> pathNeedProcess = new HashSet<>();
    // this method will avoid number of threads out of control.
    synchronized (INPUT_SUMMARY_LOCK) {
        // For each input path, calculate the total size.
        for (Path path : work.getPathToAliases().keySet()) {
            Path p = path;
            if (filter != null && !filter.accept(p)) {
                continue;
            }
            ContentSummary cs = ctx.getCS(path);
            if (cs == null) {
                if (path == null) {
                    continue;
                }
                pathNeedProcess.add(path);
            } else {
                summary[0] += cs.getLength();
                summary[1] += cs.getFileCount();
                summary[2] += cs.getDirectoryCount();
            }
        }
        // Process the case when name node call is needed
        final Map<String, ContentSummary> resultMap = new ConcurrentHashMap<String, ContentSummary>();
        final ExecutorService executor;
        int numExecutors = getMaxExecutorsForInputListing(ctx.getConf(), pathNeedProcess.size());
        if (numExecutors > 1) {
            LOG.info("Using {} threads for getContentSummary", numExecutors);
            executor = Executors.newFixedThreadPool(numExecutors, new ThreadFactoryBuilder().setDaemon(true).setNameFormat("Get-Input-Summary-%d").build());
        } else {
            executor = null;
        }
        ContentSummary cs = getInputSummaryWithPool(ctx, pathNeedProcess, work, summary, executor);
        perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.INPUT_SUMMARY);
        return cs;
    }
}
Also used : Path(org.apache.hadoop.fs.Path) ContentSummary(org.apache.hadoop.fs.ContentSummary) PerfLogger(org.apache.hadoop.hive.ql.log.PerfLogger) ExecutorService(java.util.concurrent.ExecutorService) ThreadFactoryBuilder(com.google.common.util.concurrent.ThreadFactoryBuilder) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashSet(java.util.HashSet)

Example 69 with ThreadFactoryBuilder

use of com.google.common.util.concurrent.ThreadFactoryBuilder in project hive by apache.

the class Utilities method getInputPaths.

/**
 * Computes a list of all input paths needed to compute the given MapWork. All aliases
 * are considered and a merged list of input paths is returned. If any input path points
 * to an empty table or partition a dummy file in the scratch dir is instead created and
 * added to the list. This is needed to avoid special casing the operator pipeline for
 * these cases.
 *
 * @param job JobConf used to run the job
 * @param work MapWork encapsulating the info about the task
 * @param hiveScratchDir The tmp dir used to create dummy files if needed
 * @param ctx Context object
 * @return List of paths to process for the given MapWork
 * @throws Exception
 */
public static List<Path> getInputPaths(JobConf job, MapWork work, Path hiveScratchDir, Context ctx, boolean skipDummy) throws Exception {
    Set<Path> pathsProcessed = new HashSet<Path>();
    List<Path> pathsToAdd = new LinkedList<Path>();
    LockedDriverState lDrvStat = LockedDriverState.getLockedDriverState();
    // AliasToWork contains all the aliases
    Collection<String> aliasToWork = work.getAliasToWork().keySet();
    if (!skipDummy) {
        // ConcurrentModification otherwise if adding dummy.
        aliasToWork = new ArrayList<>(aliasToWork);
    }
    for (String alias : aliasToWork) {
        LOG.info("Processing alias {}", alias);
        // The alias may not have any path
        Collection<Map.Entry<Path, ArrayList<String>>> pathToAliases = work.getPathToAliases().entrySet();
        if (!skipDummy) {
            // ConcurrentModification otherwise if adding dummy.
            pathToAliases = new ArrayList<>(pathToAliases);
        }
        boolean isEmptyTable = true;
        boolean hasLogged = false;
        for (Map.Entry<Path, ArrayList<String>> e : pathToAliases) {
            if (lDrvStat != null && lDrvStat.isAborted()) {
                throw new IOException("Operation is Canceled.");
            }
            Path file = e.getKey();
            List<String> aliases = e.getValue();
            if (aliases.contains(alias)) {
                if (file != null) {
                    isEmptyTable = false;
                } else {
                    LOG.warn("Found a null path for alias {}", alias);
                    continue;
                }
                // processed only once
                if (pathsProcessed.contains(file)) {
                    continue;
                }
                StringInternUtils.internUriStringsInPath(file);
                pathsProcessed.add(file);
                LOG.debug("Adding input file {}", file);
                if (!hasLogged) {
                    hasLogged = true;
                    LOG.info("Adding {} inputs; the first input is {}", work.getPathToAliases().size(), file);
                }
                pathsToAdd.add(file);
            }
        }
        // rows)
        if (isEmptyTable && !skipDummy) {
            pathsToAdd.add(createDummyFileForEmptyTable(job, work, hiveScratchDir, alias));
        }
    }
    List<Path> finalPathsToAdd = new LinkedList<>();
    int numExecutors = getMaxExecutorsForInputListing(job, pathsToAdd.size());
    if (numExecutors > 1) {
        ExecutorService pool = Executors.newFixedThreadPool(numExecutors, new ThreadFactoryBuilder().setDaemon(true).setNameFormat("Get-Input-Paths-%d").build());
        finalPathsToAdd.addAll(getInputPathsWithPool(job, work, hiveScratchDir, ctx, skipDummy, pathsToAdd, pool));
    } else {
        for (final Path path : pathsToAdd) {
            if (lDrvStat != null && lDrvStat.isAborted()) {
                throw new IOException("Operation is Canceled.");
            }
            Path newPath = new GetInputPathsCallable(path, job, work, hiveScratchDir, ctx, skipDummy).call();
            updatePathForMapWork(newPath, work, path);
            finalPathsToAdd.add(newPath);
        }
    }
    return finalPathsToAdd;
}
Also used : Path(org.apache.hadoop.fs.Path) ArrayList(java.util.ArrayList) IOException(java.io.IOException) LinkedList(java.util.LinkedList) ExecutorService(java.util.concurrent.ExecutorService) LockedDriverState(org.apache.hadoop.hive.ql.Driver.LockedDriverState) ThreadFactoryBuilder(com.google.common.util.concurrent.ThreadFactoryBuilder) Map(java.util.Map) LinkedHashMap(java.util.LinkedHashMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) HashSet(java.util.HashSet)

Example 70 with ThreadFactoryBuilder

use of com.google.common.util.concurrent.ThreadFactoryBuilder in project hive by apache.

the class LlapServiceDriver method run.

private int run(String[] args) throws Exception {
    LlapOptionsProcessor optionsProcessor = new LlapOptionsProcessor();
    final LlapOptions options = optionsProcessor.processOptions(args);
    final Properties propsDirectOptions = new Properties();
    if (options == null) {
        // help
        return 1;
    }
    // Working directory.
    Path tmpDir = new Path(options.getDirectory());
    if (conf == null) {
        throw new Exception("Cannot load any configuration to run command");
    }
    final long t0 = System.nanoTime();
    final FileSystem fs = FileSystem.get(conf);
    final FileSystem lfs = FileSystem.getLocal(conf).getRawFileSystem();
    int threadCount = Math.max(1, Runtime.getRuntime().availableProcessors() / 2);
    final ExecutorService executor = Executors.newFixedThreadPool(threadCount, new ThreadFactoryBuilder().setNameFormat("llap-pkg-%d").build());
    final CompletionService<Void> asyncRunner = new ExecutorCompletionService<Void>(executor);
    int rc = 0;
    try {
        // needed so that the file is actually loaded into configuration.
        for (String f : NEEDED_CONFIGS) {
            conf.addResource(f);
            if (conf.getResource(f) == null) {
                throw new Exception("Unable to find required config file: " + f);
            }
        }
        for (String f : OPTIONAL_CONFIGS) {
            conf.addResource(f);
        }
        conf.reloadConfiguration();
        populateConfWithLlapProperties(conf, options.getConfig());
        if (options.getName() != null) {
            // update service registry configs - caveat: this has nothing to do with the actual settings
            // as read by the AM
            // if needed, use --hiveconf llap.daemon.service.hosts=@llap0 to dynamically switch between
            // instances
            conf.set(ConfVars.LLAP_DAEMON_SERVICE_HOSTS.varname, "@" + options.getName());
            propsDirectOptions.setProperty(ConfVars.LLAP_DAEMON_SERVICE_HOSTS.varname, "@" + options.getName());
        }
        if (options.getLogger() != null) {
            HiveConf.setVar(conf, ConfVars.LLAP_DAEMON_LOGGER, options.getLogger());
            propsDirectOptions.setProperty(ConfVars.LLAP_DAEMON_LOGGER.varname, options.getLogger());
        }
        boolean isDirect = HiveConf.getBoolVar(conf, HiveConf.ConfVars.LLAP_ALLOCATOR_DIRECT);
        if (options.getSize() != -1) {
            if (options.getCache() != -1) {
                if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.LLAP_ALLOCATOR_MAPPED) == false) {
                    // direct heap allocations need to be safer
                    Preconditions.checkArgument(options.getCache() < options.getSize(), "Cache size (" + LlapUtil.humanReadableByteCount(options.getCache()) + ") has to be smaller" + " than the container sizing (" + LlapUtil.humanReadableByteCount(options.getSize()) + ")");
                } else if (options.getCache() < options.getSize()) {
                    LOG.warn("Note that this might need YARN physical memory monitoring to be turned off " + "(yarn.nodemanager.pmem-check-enabled=false)");
                }
            }
            if (options.getXmx() != -1) {
                Preconditions.checkArgument(options.getXmx() < options.getSize(), "Working memory (Xmx=" + LlapUtil.humanReadableByteCount(options.getXmx()) + ") has to be" + " smaller than the container sizing (" + LlapUtil.humanReadableByteCount(options.getSize()) + ")");
            }
            if (isDirect && !HiveConf.getBoolVar(conf, HiveConf.ConfVars.LLAP_ALLOCATOR_MAPPED)) {
                // direct and not memory mapped
                Preconditions.checkArgument(options.getXmx() + options.getCache() <= options.getSize(), "Working memory (Xmx=" + LlapUtil.humanReadableByteCount(options.getXmx()) + ") + cache size (" + LlapUtil.humanReadableByteCount(options.getCache()) + ") has to be smaller than the container sizing (" + LlapUtil.humanReadableByteCount(options.getSize()) + ")");
            }
        }
        if (options.getExecutors() != -1) {
            conf.setLong(ConfVars.LLAP_DAEMON_NUM_EXECUTORS.varname, options.getExecutors());
            propsDirectOptions.setProperty(ConfVars.LLAP_DAEMON_NUM_EXECUTORS.varname, String.valueOf(options.getExecutors()));
        // TODO: vcpu settings - possibly when DRFA works right
        }
        if (options.getIoThreads() != -1) {
            conf.setLong(ConfVars.LLAP_IO_THREADPOOL_SIZE.varname, options.getIoThreads());
            propsDirectOptions.setProperty(ConfVars.LLAP_IO_THREADPOOL_SIZE.varname, String.valueOf(options.getIoThreads()));
        }
        long cache = -1, xmx = -1;
        if (options.getCache() != -1) {
            cache = options.getCache();
            conf.set(HiveConf.ConfVars.LLAP_IO_MEMORY_MAX_SIZE.varname, Long.toString(cache));
            propsDirectOptions.setProperty(HiveConf.ConfVars.LLAP_IO_MEMORY_MAX_SIZE.varname, Long.toString(cache));
        }
        if (options.getXmx() != -1) {
            // Needs more explanation here
            // Xmx is not the max heap value in JDK8. You need to subtract 50% of the survivor fraction
            // from this, to get actual usable memory before it goes into GC
            xmx = options.getXmx();
            long xmxMb = (xmx / (1024L * 1024L));
            conf.setLong(ConfVars.LLAP_DAEMON_MEMORY_PER_INSTANCE_MB.varname, xmxMb);
            propsDirectOptions.setProperty(ConfVars.LLAP_DAEMON_MEMORY_PER_INSTANCE_MB.varname, String.valueOf(xmxMb));
        }
        long size = options.getSize();
        if (size == -1) {
            long heapSize = xmx;
            if (!isDirect) {
                heapSize += cache;
            }
            size = Math.min((long) (heapSize * 1.2), heapSize + 1024L * 1024 * 1024);
            if (isDirect) {
                size += cache;
            }
        }
        long containerSize = size / (1024 * 1024);
        final long minAlloc = conf.getInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, -1);
        Preconditions.checkArgument(containerSize >= minAlloc, "Container size (" + LlapUtil.humanReadableByteCount(options.getSize()) + ") should be greater" + " than minimum allocation(" + LlapUtil.humanReadableByteCount(minAlloc * 1024L * 1024L) + ")");
        conf.setLong(ConfVars.LLAP_DAEMON_YARN_CONTAINER_MB.varname, containerSize);
        propsDirectOptions.setProperty(ConfVars.LLAP_DAEMON_YARN_CONTAINER_MB.varname, String.valueOf(containerSize));
        LOG.info("Memory settings: container memory: {} executor memory: {} cache memory: {}", LlapUtil.humanReadableByteCount(options.getSize()), LlapUtil.humanReadableByteCount(options.getXmx()), LlapUtil.humanReadableByteCount(options.getCache()));
        if (options.getLlapQueueName() != null && !options.getLlapQueueName().isEmpty()) {
            conf.set(ConfVars.LLAP_DAEMON_QUEUE_NAME.varname, options.getLlapQueueName());
            propsDirectOptions.setProperty(ConfVars.LLAP_DAEMON_QUEUE_NAME.varname, options.getLlapQueueName());
        }
        final URL logger = conf.getResource(LlapConstants.LOG4j2_PROPERTIES_FILE);
        if (null == logger) {
            throw new Exception("Unable to find required config file: llap-daemon-log4j2.properties");
        }
        Path home = new Path(System.getenv("HIVE_HOME"));
        Path scriptParent = new Path(new Path(home, "scripts"), "llap");
        Path scripts = new Path(scriptParent, "bin");
        if (!lfs.exists(home)) {
            throw new Exception("Unable to find HIVE_HOME:" + home);
        } else if (!lfs.exists(scripts)) {
            LOG.warn("Unable to find llap scripts:" + scripts);
        }
        final Path libDir = new Path(tmpDir, "lib");
        final Path tezDir = new Path(libDir, "tez");
        final Path udfDir = new Path(libDir, "udfs");
        final Path confPath = new Path(tmpDir, "conf");
        if (!lfs.mkdirs(confPath)) {
            LOG.warn("mkdirs for " + confPath + " returned false");
        }
        if (!lfs.mkdirs(tezDir)) {
            LOG.warn("mkdirs for " + tezDir + " returned false");
        }
        if (!lfs.mkdirs(udfDir)) {
            LOG.warn("mkdirs for " + udfDir + " returned false");
        }
        NamedCallable<Void> downloadTez = new NamedCallable<Void>("downloadTez") {

            @Override
            public Void call() throws Exception {
                synchronized (fs) {
                    String tezLibs = conf.get(TezConfiguration.TEZ_LIB_URIS);
                    if (tezLibs == null) {
                        LOG.warn("Missing tez.lib.uris in tez-site.xml");
                    }
                    if (LOG.isDebugEnabled()) {
                        LOG.debug("Copying tez libs from " + tezLibs);
                    }
                    lfs.mkdirs(tezDir);
                    fs.copyToLocalFile(new Path(tezLibs), new Path(libDir, "tez.tar.gz"));
                    CompressionUtils.unTar(new Path(libDir, "tez.tar.gz").toString(), tezDir.toString(), true);
                    lfs.delete(new Path(libDir, "tez.tar.gz"), false);
                }
                return null;
            }
        };
        NamedCallable<Void> copyLocalJars = new NamedCallable<Void>("copyLocalJars") {

            @Override
            public Void call() throws Exception {
                Class<?>[] dependencies = new Class<?>[] { // llap-common
                LlapDaemonProtocolProtos.class, // llap-tez
                LlapTezUtils.class, // llap-server
                LlapInputFormat.class, // hive-exec
                HiveInputFormat.class, // hive-common (https deps)
                SslContextFactory.class, // Jetty rewrite class
                Rule.class, // ZK registry
                RegistryUtils.ServiceRecordMarshal.class, // disruptor
                com.lmax.disruptor.RingBuffer.class, // log4j-api
                org.apache.logging.log4j.Logger.class, // log4j-core
                org.apache.logging.log4j.core.Appender.class, // log4j-slf4j
                org.apache.logging.slf4j.Log4jLogger.class, // log4j-1.2-API needed for NDC
                org.apache.log4j.config.Log4j1ConfigurationFactory.class, // netty4
                io.netty.util.NetUtil.class, // netty3
                org.jboss.netty.util.NetUtil.class };
                for (Class<?> c : dependencies) {
                    Path jarPath = new Path(Utilities.jarFinderGetJar(c));
                    lfs.copyFromLocalFile(jarPath, libDir);
                    if (LOG.isDebugEnabled()) {
                        LOG.debug("Copying " + jarPath + " to " + libDir);
                    }
                }
                return null;
            }
        };
        // copy default aux classes (json/hbase)
        NamedCallable<Void> copyAuxJars = new NamedCallable<Void>("copyAuxJars") {

            @Override
            public Void call() throws Exception {
                for (String className : DEFAULT_AUX_CLASSES) {
                    localizeJarForClass(lfs, libDir, className, false);
                }
                Collection<String> codecs = conf.getStringCollection("io.compression.codecs");
                if (codecs != null) {
                    for (String codecClassName : codecs) {
                        localizeJarForClass(lfs, libDir, codecClassName, false);
                    }
                }
                if (options.getIsHBase()) {
                    try {
                        localizeJarForClass(lfs, libDir, HBASE_SERDE_CLASS, true);
                        // HBase API is convoluted.
                        Job fakeJob = new Job(new JobConf());
                        TableMapReduceUtil.addDependencyJars(fakeJob);
                        Collection<String> hbaseJars = fakeJob.getConfiguration().getStringCollection("tmpjars");
                        for (String jarPath : hbaseJars) {
                            if (!jarPath.isEmpty()) {
                                lfs.copyFromLocalFile(new Path(jarPath), libDir);
                            }
                        }
                    } catch (Throwable t) {
                        String err = "Failed to add HBase jars. Use --auxhbase=false to avoid localizing them";
                        LOG.error(err);
                        System.err.println(err);
                        throw new RuntimeException(t);
                    }
                }
                HashSet<String> auxJars = new HashSet<>();
                // There are many ways to have AUX jars in Hive... sigh
                if (options.getIsHiveAux()) {
                    // Note: we don't add ADDED jars, RELOADABLE jars, etc. That is by design; there are too many ways
                    // to add jars in Hive, some of which are session/etc. specific. Env + conf + arg should be enough.
                    addAuxJarsToSet(auxJars, conf.getAuxJars(), ",");
                    addAuxJarsToSet(auxJars, System.getenv("HIVE_AUX_JARS_PATH"), ":");
                    LOG.info("Adding the following aux jars from the environment and configs: " + auxJars);
                }
                addAuxJarsToSet(auxJars, options.getAuxJars(), ",");
                for (String jarPath : auxJars) {
                    lfs.copyFromLocalFile(new Path(jarPath), libDir);
                }
                return null;
            }

            private void addAuxJarsToSet(HashSet<String> auxJarSet, String auxJars, String delimiter) {
                if (auxJars != null && !auxJars.isEmpty()) {
                    // TODO: transitive dependencies warning?
                    String[] jarPaths = auxJars.split(delimiter);
                    for (String jarPath : jarPaths) {
                        if (!jarPath.isEmpty()) {
                            auxJarSet.add(jarPath);
                        }
                    }
                }
            }
        };
        NamedCallable<Void> copyUdfJars = new NamedCallable<Void>("copyUdfJars") {

            @Override
            public Void call() throws Exception {
                // UDFs
                final Set<String> allowedUdfs;
                if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.LLAP_ALLOW_PERMANENT_FNS)) {
                    synchronized (fs) {
                        allowedUdfs = downloadPermanentFunctions(conf, udfDir);
                    }
                } else {
                    allowedUdfs = Collections.emptySet();
                }
                PrintWriter udfStream = new PrintWriter(lfs.create(new Path(confPath, StaticPermanentFunctionChecker.PERMANENT_FUNCTIONS_LIST)));
                for (String udfClass : allowedUdfs) {
                    udfStream.println(udfClass);
                }
                udfStream.close();
                return null;
            }
        };
        String java_home;
        if (options.getJavaPath() == null || options.getJavaPath().isEmpty()) {
            java_home = System.getenv("JAVA_HOME");
            String jre_home = System.getProperty("java.home");
            if (java_home == null) {
                java_home = jre_home;
            } else if (!java_home.equals(jre_home)) {
                LOG.warn("Java versions might not match : JAVA_HOME=[{}],process jre=[{}]", java_home, jre_home);
            }
        } else {
            java_home = options.getJavaPath();
        }
        if (java_home == null || java_home.isEmpty()) {
            throw new RuntimeException("Could not determine JAVA_HOME from command line parameters, environment or system properties");
        }
        LOG.info("Using [{}] for JAVA_HOME", java_home);
        NamedCallable<Void> copyConfigs = new NamedCallable<Void>("copyConfigs") {

            @Override
            public Void call() throws Exception {
                // Copy over the mandatory configs for the package.
                for (String f : NEEDED_CONFIGS) {
                    copyConfig(lfs, confPath, f);
                }
                for (String f : OPTIONAL_CONFIGS) {
                    try {
                        copyConfig(lfs, confPath, f);
                    } catch (Throwable t) {
                        LOG.info("Error getting an optional config " + f + "; ignoring: " + t.getMessage());
                    }
                }
                createLlapDaemonConfig(lfs, confPath, conf, propsDirectOptions, options.getConfig());
                setUpLogAndMetricConfigs(lfs, logger, confPath);
                return null;
            }
        };
        @SuppressWarnings("unchecked") final NamedCallable<Void>[] asyncWork = new NamedCallable[] { downloadTez, copyUdfJars, copyLocalJars, copyAuxJars, copyConfigs };
        @SuppressWarnings("unchecked") final Future<Void>[] asyncResults = new Future[asyncWork.length];
        for (int i = 0; i < asyncWork.length; i++) {
            asyncResults[i] = asyncRunner.submit(asyncWork[i]);
        }
        // TODO: need to move from Python to Java for the rest of the script.
        JSONObject configs = createConfigJson(containerSize, cache, xmx, java_home);
        writeConfigJson(tmpDir, lfs, configs);
        if (LOG.isDebugEnabled()) {
            LOG.debug("Config generation took " + (System.nanoTime() - t0) + " ns");
        }
        for (int i = 0; i < asyncWork.length; i++) {
            final long t1 = System.nanoTime();
            asyncResults[i].get();
            final long t2 = System.nanoTime();
            if (LOG.isDebugEnabled()) {
                LOG.debug(asyncWork[i].getName() + " waited for " + (t2 - t1) + " ns");
            }
        }
        if (options.isStarting()) {
            String version = System.getenv("HIVE_VERSION");
            if (version == null || version.isEmpty()) {
                version = DateTime.now().toString("ddMMMyyyy");
            }
            String outputDir = options.getOutput();
            Path packageDir = null;
            if (outputDir == null) {
                outputDir = OUTPUT_DIR_PREFIX + version;
                packageDir = new Path(Paths.get(".").toAbsolutePath().toString(), OUTPUT_DIR_PREFIX + version);
            } else {
                packageDir = new Path(outputDir);
            }
            rc = runPackagePy(args, tmpDir, scriptParent, version, outputDir);
            if (rc == 0) {
                LlapSliderUtils.startCluster(conf, options.getName(), "llap-" + version + ".zip", packageDir, HiveConf.getVar(conf, ConfVars.LLAP_DAEMON_QUEUE_NAME));
            }
        } else {
            rc = 0;
        }
    } finally {
        executor.shutdown();
        lfs.close();
        fs.close();
    }
    if (rc == 0) {
        if (LOG.isDebugEnabled()) {
            LOG.debug("Exiting successfully");
        }
    } else {
        LOG.info("Exiting with rc = " + rc);
    }
    return rc;
}
Also used : FileSystem(org.apache.hadoop.fs.FileSystem) LlapDaemonProtocolProtos(org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos) Job(org.apache.hadoop.mapreduce.Job) JobConf(org.apache.hadoop.mapred.JobConf) HashSet(java.util.HashSet) JSONObject(org.codehaus.jettison.json.JSONObject) Rule(org.eclipse.jetty.rewrite.handler.Rule) LlapTezUtils(org.apache.hadoop.hive.llap.tezplugins.LlapTezUtils) ExecutorCompletionService(java.util.concurrent.ExecutorCompletionService) Properties(java.util.Properties) Logger(org.slf4j.Logger) URL(java.net.URL) HiveInputFormat(org.apache.hadoop.hive.ql.io.HiveInputFormat) SslContextFactory(org.eclipse.jetty.util.ssl.SslContextFactory) ThreadFactoryBuilder(com.google.common.util.concurrent.ThreadFactoryBuilder) LlapInputFormat(org.apache.hadoop.hive.llap.io.api.impl.LlapInputFormat) LlapOptions(org.apache.hadoop.hive.llap.cli.LlapOptionsProcessor.LlapOptions) PrintWriter(java.io.PrintWriter) Path(org.apache.hadoop.fs.Path) URISyntaxException(java.net.URISyntaxException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) IOException(java.io.IOException) JSONException(org.codehaus.jettison.json.JSONException) ExecutorService(java.util.concurrent.ExecutorService) Future(java.util.concurrent.Future)

Aggregations

ThreadFactoryBuilder (com.google.common.util.concurrent.ThreadFactoryBuilder)143 ExecutorService (java.util.concurrent.ExecutorService)49 ThreadFactory (java.util.concurrent.ThreadFactory)46 IOException (java.io.IOException)23 Future (java.util.concurrent.Future)19 ThreadPoolExecutor (java.util.concurrent.ThreadPoolExecutor)19 ExecutionException (java.util.concurrent.ExecutionException)17 ArrayList (java.util.ArrayList)15 Callable (java.util.concurrent.Callable)12 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)12 HashMap (java.util.HashMap)11 Path (org.apache.hadoop.fs.Path)11 LinkedList (java.util.LinkedList)10 Map (java.util.Map)10 HashSet (java.util.HashSet)9 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)9 ScheduledExecutorService (java.util.concurrent.ScheduledExecutorService)9 Test (org.junit.Test)9 LinkedBlockingQueue (java.util.concurrent.LinkedBlockingQueue)8 Before (org.junit.Before)8