Search in sources :

Example 16 with ThreadFactoryBuilder

use of com.google.common.util.concurrent.ThreadFactoryBuilder in project hive by apache.

the class Utilities method getInputSummary.

/**
   * Calculate the total size of input files.
   *
   * @param ctx
   *          the hadoop job context
   * @param work
   *          map reduce job plan
   * @param filter
   *          filter to apply to the input paths before calculating size
   * @return the summary of all the input paths.
   * @throws IOException
   */
public static ContentSummary getInputSummary(final Context ctx, MapWork work, PathFilter filter) throws IOException {
    PerfLogger perfLogger = SessionState.getPerfLogger();
    perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.INPUT_SUMMARY);
    long[] summary = { 0, 0, 0 };
    final Set<Path> pathNeedProcess = new HashSet<>();
    // this method will avoid number of threads out of control.
    synchronized (INPUT_SUMMARY_LOCK) {
        // For each input path, calculate the total size.
        for (Path path : work.getPathToAliases().keySet()) {
            Path p = path;
            if (filter != null && !filter.accept(p)) {
                continue;
            }
            ContentSummary cs = ctx.getCS(path);
            if (cs == null) {
                if (path == null) {
                    continue;
                }
                pathNeedProcess.add(path);
            } else {
                summary[0] += cs.getLength();
                summary[1] += cs.getFileCount();
                summary[2] += cs.getDirectoryCount();
            }
        }
        // Process the case when name node call is needed
        final Map<String, ContentSummary> resultMap = new ConcurrentHashMap<String, ContentSummary>();
        ArrayList<Future<?>> results = new ArrayList<Future<?>>();
        final ExecutorService executor;
        int numExecutors = getMaxExecutorsForInputListing(ctx.getConf(), pathNeedProcess.size());
        if (numExecutors > 1) {
            LOG.info("Using " + numExecutors + " threads for getContentSummary");
            executor = Executors.newFixedThreadPool(numExecutors, new ThreadFactoryBuilder().setDaemon(true).setNameFormat("Get-Input-Summary-%d").build());
        } else {
            executor = null;
        }
        HiveInterruptCallback interrup = HiveInterruptUtils.add(new HiveInterruptCallback() {

            @Override
            public void interrupt() {
                for (Path path : pathNeedProcess) {
                    try {
                        path.getFileSystem(ctx.getConf()).close();
                    } catch (IOException ignore) {
                        LOG.debug("Failed to close filesystem", ignore);
                    }
                }
                if (executor != null) {
                    executor.shutdownNow();
                }
            }
        });
        try {
            Configuration conf = ctx.getConf();
            JobConf jobConf = new JobConf(conf);
            for (Path path : pathNeedProcess) {
                final Path p = path;
                final String pathStr = path.toString();
                // All threads share the same Configuration and JobConf based on the
                // assumption that they are thread safe if only read operations are
                // executed. It is not stated in Hadoop's javadoc, the sourcce codes
                // clearly showed that they made efforts for it and we believe it is
                // thread safe. Will revisit this piece of codes if we find the assumption
                // is not correct.
                final Configuration myConf = conf;
                final JobConf myJobConf = jobConf;
                final Map<String, Operator<?>> aliasToWork = work.getAliasToWork();
                final Map<Path, ArrayList<String>> pathToAlias = work.getPathToAliases();
                final PartitionDesc partDesc = work.getPathToPartitionInfo().get(p);
                Runnable r = new Runnable() {

                    @Override
                    public void run() {
                        try {
                            Class<? extends InputFormat> inputFormatCls = partDesc.getInputFileFormatClass();
                            InputFormat inputFormatObj = HiveInputFormat.getInputFormatFromCache(inputFormatCls, myJobConf);
                            if (inputFormatObj instanceof ContentSummaryInputFormat) {
                                ContentSummaryInputFormat cs = (ContentSummaryInputFormat) inputFormatObj;
                                resultMap.put(pathStr, cs.getContentSummary(p, myJobConf));
                                return;
                            }
                            String metaTableStorage = null;
                            if (partDesc.getTableDesc() != null && partDesc.getTableDesc().getProperties() != null) {
                                metaTableStorage = partDesc.getTableDesc().getProperties().getProperty(hive_metastoreConstants.META_TABLE_STORAGE, null);
                            }
                            if (partDesc.getProperties() != null) {
                                metaTableStorage = partDesc.getProperties().getProperty(hive_metastoreConstants.META_TABLE_STORAGE, metaTableStorage);
                            }
                            HiveStorageHandler handler = HiveUtils.getStorageHandler(myConf, metaTableStorage);
                            if (handler instanceof InputEstimator) {
                                long total = 0;
                                TableDesc tableDesc = partDesc.getTableDesc();
                                InputEstimator estimator = (InputEstimator) handler;
                                for (String alias : HiveFileFormatUtils.doGetAliasesFromPath(pathToAlias, p)) {
                                    JobConf jobConf = new JobConf(myJobConf);
                                    TableScanOperator scanOp = (TableScanOperator) aliasToWork.get(alias);
                                    Utilities.setColumnNameList(jobConf, scanOp, true);
                                    Utilities.setColumnTypeList(jobConf, scanOp, true);
                                    PlanUtils.configureInputJobPropertiesForStorageHandler(tableDesc);
                                    Utilities.copyTableJobPropertiesToConf(tableDesc, jobConf);
                                    total += estimator.estimate(jobConf, scanOp, -1).getTotalLength();
                                }
                                resultMap.put(pathStr, new ContentSummary(total, -1, -1));
                            } else {
                                // todo: should nullify summary for non-native tables,
                                // not to be selected as a mapjoin target
                                FileSystem fs = p.getFileSystem(myConf);
                                resultMap.put(pathStr, fs.getContentSummary(p));
                            }
                        } catch (Exception e) {
                            // We safely ignore this exception for summary data.
                            // We don't update the cache to protect it from polluting other
                            // usages. The worst case is that IOException will always be
                            // retried for another getInputSummary(), which is fine as
                            // IOException is not considered as a common case.
                            LOG.info("Cannot get size of " + pathStr + ". Safely ignored.");
                        }
                    }
                };
                if (executor == null) {
                    r.run();
                } else {
                    Future<?> result = executor.submit(r);
                    results.add(result);
                }
            }
            if (executor != null) {
                for (Future<?> result : results) {
                    boolean executorDone = false;
                    do {
                        try {
                            result.get();
                            executorDone = true;
                        } catch (InterruptedException e) {
                            LOG.info("Interrupted when waiting threads: ", e);
                            Thread.currentThread().interrupt();
                            break;
                        } catch (ExecutionException e) {
                            throw new IOException(e);
                        }
                    } while (!executorDone);
                }
                executor.shutdown();
            }
            HiveInterruptUtils.checkInterrupted();
            for (Map.Entry<String, ContentSummary> entry : resultMap.entrySet()) {
                ContentSummary cs = entry.getValue();
                summary[0] += cs.getLength();
                summary[1] += cs.getFileCount();
                summary[2] += cs.getDirectoryCount();
                ctx.addCS(entry.getKey(), cs);
                LOG.info("Cache Content Summary for " + entry.getKey() + " length: " + cs.getLength() + " file count: " + cs.getFileCount() + " directory count: " + cs.getDirectoryCount());
            }
            perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.INPUT_SUMMARY);
            return new ContentSummary(summary[0], summary[1], summary[2]);
        } finally {
            HiveInterruptUtils.remove(interrup);
        }
    }
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) PerfLogger(org.apache.hadoop.hive.ql.log.PerfLogger) ArrayList(java.util.ArrayList) ContentSummaryInputFormat(org.apache.hadoop.hive.ql.io.ContentSummaryInputFormat) FileSystem(org.apache.hadoop.fs.FileSystem) ThreadFactoryBuilder(com.google.common.util.concurrent.ThreadFactoryBuilder) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) ExecutionException(java.util.concurrent.ExecutionException) JobConf(org.apache.hadoop.mapred.JobConf) HashSet(java.util.HashSet) Path(org.apache.hadoop.fs.Path) InputEstimator(org.apache.hadoop.hive.ql.metadata.InputEstimator) HiveStorageHandler(org.apache.hadoop.hive.ql.metadata.HiveStorageHandler) HiveInterruptCallback(org.apache.hadoop.hive.common.HiveInterruptCallback) IOException(java.io.IOException) SQLFeatureNotSupportedException(java.sql.SQLFeatureNotSupportedException) SQLTransientException(java.sql.SQLTransientException) SQLException(java.sql.SQLException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) EOFException(java.io.EOFException) FileNotFoundException(java.io.FileNotFoundException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) SequenceFileInputFormat(org.apache.hadoop.mapred.SequenceFileInputFormat) ReworkMapredInputFormat(org.apache.hadoop.hive.ql.io.ReworkMapredInputFormat) ContentSummaryInputFormat(org.apache.hadoop.hive.ql.io.ContentSummaryInputFormat) InputFormat(org.apache.hadoop.mapred.InputFormat) FileInputFormat(org.apache.hadoop.mapred.FileInputFormat) OneNullRowInputFormat(org.apache.hadoop.hive.ql.io.OneNullRowInputFormat) HiveInputFormat(org.apache.hadoop.hive.ql.io.HiveInputFormat) ContentSummary(org.apache.hadoop.fs.ContentSummary) ExecutorService(java.util.concurrent.ExecutorService) Future(java.util.concurrent.Future) PartitionDesc(org.apache.hadoop.hive.ql.plan.PartitionDesc) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) Map(java.util.Map) LinkedHashMap(java.util.LinkedHashMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap)

Example 17 with ThreadFactoryBuilder

use of com.google.common.util.concurrent.ThreadFactoryBuilder in project hive by apache.

the class ATSHook method setupAtsExecutor.

private static void setupAtsExecutor(HiveConf conf) {
    synchronized (LOCK) {
        if (executor == null) {
            // The call to ATS appears to block indefinitely, blocking the ATS thread while
            // the hook continues to submit work to the ExecutorService with each query.
            // Over time the queued items can cause OOM as the HookContext seems to contain
            // some items which use a lot of memory.
            // Prevent this situation by creating executor with bounded capacity -
            // the event will not be sent to ATS if there are too many outstanding work submissions.
            int queueCapacity = conf.getIntVar(HiveConf.ConfVars.ATSHOOKQUEUECAPACITY);
            // Executor to create the ATS events.
            // This can use significant resources and should not be done on the main query thread.
            LOG.info("Creating ATS executor queue with capacity " + queueCapacity);
            BlockingQueue<Runnable> queue = new LinkedBlockingQueue<Runnable>(queueCapacity);
            ThreadFactory threadFactory = new ThreadFactoryBuilder().setDaemon(true).setNameFormat("ATS Logger %d").build();
            executor = new ThreadPoolExecutor(1, 1, 0, TimeUnit.MILLISECONDS, queue, threadFactory);
            // Create a separate thread to send the events.
            // Keep separate from the creating events in case the send blocks.
            BlockingQueue<Runnable> senderQueue = new LinkedBlockingQueue<Runnable>(queueCapacity);
            senderExecutor = new ThreadPoolExecutor(1, 1, 0, TimeUnit.MILLISECONDS, senderQueue, threadFactory);
            YarnConfiguration yarnConf = new YarnConfiguration();
            timelineClient = TimelineClient.createTimelineClient();
            timelineClient.init(yarnConf);
            timelineClient.start();
            ShutdownHookManager.addShutdownHook(new Runnable() {

                @Override
                public void run() {
                    try {
                        executor.shutdown();
                        executor.awaitTermination(WAIT_TIME, TimeUnit.SECONDS);
                        executor = null;
                    } catch (InterruptedException ie) {
                    /* ignore */
                    }
                    timelineClient.stop();
                }
            });
        }
    }
}
Also used : ThreadFactory(java.util.concurrent.ThreadFactory) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) ThreadFactoryBuilder(com.google.common.util.concurrent.ThreadFactoryBuilder) ThreadPoolExecutor(java.util.concurrent.ThreadPoolExecutor) LinkedBlockingQueue(java.util.concurrent.LinkedBlockingQueue)

Example 18 with ThreadFactoryBuilder

use of com.google.common.util.concurrent.ThreadFactoryBuilder in project hive by apache.

the class Hive method trashFiles.

/**
   * Trashes or deletes all files under a directory. Leaves the directory as is.
   * @param fs FileSystem to use
   * @param statuses fileStatuses of files to be deleted
   * @param conf hive configuration
   * @return true if deletion successful
   * @throws IOException
   */
public static boolean trashFiles(final FileSystem fs, final FileStatus[] statuses, final Configuration conf) throws IOException {
    boolean result = true;
    if (statuses == null || statuses.length == 0) {
        return false;
    }
    final List<Future<Boolean>> futures = new LinkedList<>();
    final ExecutorService pool = conf.getInt(ConfVars.HIVE_MOVE_FILES_THREAD_COUNT.varname, 25) > 0 ? Executors.newFixedThreadPool(conf.getInt(ConfVars.HIVE_MOVE_FILES_THREAD_COUNT.varname, 25), new ThreadFactoryBuilder().setDaemon(true).setNameFormat("Delete-Thread-%d").build()) : null;
    final SessionState parentSession = SessionState.get();
    for (final FileStatus status : statuses) {
        if (null == pool) {
            result &= FileUtils.moveToTrash(fs, status.getPath(), conf);
        } else {
            futures.add(pool.submit(new Callable<Boolean>() {

                @Override
                public Boolean call() throws Exception {
                    SessionState.setCurrentSessionState(parentSession);
                    return FileUtils.moveToTrash(fs, status.getPath(), conf);
                }
            }));
        }
    }
    if (null != pool) {
        pool.shutdown();
        for (Future<Boolean> future : futures) {
            try {
                result &= future.get();
            } catch (InterruptedException | ExecutionException e) {
                LOG.error("Failed to delete: ", e);
                pool.shutdownNow();
                throw new IOException(e);
            }
        }
    }
    return result;
}
Also used : SessionState(org.apache.hadoop.hive.ql.session.SessionState) FileStatus(org.apache.hadoop.fs.FileStatus) IOException(java.io.IOException) LinkedList(java.util.LinkedList) Callable(java.util.concurrent.Callable) ExecutorService(java.util.concurrent.ExecutorService) Future(java.util.concurrent.Future) ThreadFactoryBuilder(com.google.common.util.concurrent.ThreadFactoryBuilder) ExecutionException(java.util.concurrent.ExecutionException)

Example 19 with ThreadFactoryBuilder

use of com.google.common.util.concurrent.ThreadFactoryBuilder in project hive by apache.

the class Hive method copyFiles.

private static void copyFiles(final HiveConf conf, final FileSystem destFs, FileStatus[] srcs, final FileSystem srcFs, final Path destf, final boolean isSrcLocal, final List<Path> newFiles) throws HiveException {
    final HdfsUtils.HadoopFileStatus fullDestStatus;
    try {
        fullDestStatus = new HdfsUtils.HadoopFileStatus(conf, destFs, destf);
    } catch (IOException e1) {
        throw new HiveException(e1);
    }
    if (!fullDestStatus.getFileStatus().isDirectory()) {
        throw new HiveException(destf + " is not a directory.");
    }
    final boolean inheritPerms = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_WAREHOUSE_SUBDIR_INHERIT_PERMS);
    final List<Future<ObjectPair<Path, Path>>> futures = new LinkedList<>();
    final ExecutorService pool = conf.getInt(ConfVars.HIVE_MOVE_FILES_THREAD_COUNT.varname, 25) > 0 ? Executors.newFixedThreadPool(conf.getInt(ConfVars.HIVE_MOVE_FILES_THREAD_COUNT.varname, 25), new ThreadFactoryBuilder().setDaemon(true).setNameFormat("Move-Thread-%d").build()) : null;
    for (FileStatus src : srcs) {
        FileStatus[] files;
        if (src.isDirectory()) {
            try {
                files = srcFs.listStatus(src.getPath(), FileUtils.HIDDEN_FILES_PATH_FILTER);
            } catch (IOException e) {
                pool.shutdownNow();
                throw new HiveException(e);
            }
        } else {
            files = new FileStatus[] { src };
        }
        final SessionState parentSession = SessionState.get();
        for (final FileStatus srcFile : files) {
            final Path srcP = srcFile.getPath();
            final boolean needToCopy = needToCopy(srcP, destf, srcFs, destFs);
            final boolean isRenameAllowed = !needToCopy && !isSrcLocal;
            // If we do a rename for a non-local file, we will be transfering the original
            // file permissions from source to the destination. Else, in case of mvFile() where we
            // copy from source to destination, we will inherit the destination's parent group ownership.
            final String srcGroup = isRenameAllowed ? srcFile.getGroup() : fullDestStatus.getFileStatus().getGroup();
            if (null == pool) {
                try {
                    Path destPath = mvFile(conf, srcFs, srcP, destFs, destf, isSrcLocal, isRenameAllowed);
                    if (null != newFiles) {
                        newFiles.add(destPath);
                    }
                } catch (IOException ioe) {
                    LOG.error("Failed to move: {}", ioe.getMessage());
                    throw new HiveException(ioe.getCause());
                }
            } else {
                futures.add(pool.submit(new Callable<ObjectPair<Path, Path>>() {

                    @Override
                    public ObjectPair<Path, Path> call() throws Exception {
                        SessionState.setCurrentSessionState(parentSession);
                        Path destPath = mvFile(conf, srcFs, srcP, destFs, destf, isSrcLocal, isRenameAllowed);
                        if (inheritPerms) {
                            HdfsUtils.setFullFileStatus(conf, fullDestStatus, srcGroup, destFs, destPath, false);
                        }
                        if (null != newFiles) {
                            newFiles.add(destPath);
                        }
                        return ObjectPair.create(srcP, destPath);
                    }
                }));
            }
        }
    }
    if (null == pool) {
        if (inheritPerms) {
            HdfsUtils.setFullFileStatus(conf, fullDestStatus, null, destFs, destf, true);
        }
    } else {
        pool.shutdown();
        for (Future<ObjectPair<Path, Path>> future : futures) {
            try {
                ObjectPair<Path, Path> pair = future.get();
                LOG.debug("Moved src: {}", pair.getFirst().toString(), ", to dest: {}", pair.getSecond().toString());
            } catch (Exception e) {
                LOG.error("Failed to move: {}", e.getMessage());
                pool.shutdownNow();
                throw new HiveException(e.getCause());
            }
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) SessionState(org.apache.hadoop.hive.ql.session.SessionState) FileStatus(org.apache.hadoop.fs.FileStatus) IOException(java.io.IOException) LinkedList(java.util.LinkedList) Callable(java.util.concurrent.Callable) AlreadyExistsException(org.apache.hadoop.hive.metastore.api.AlreadyExistsException) InvalidOperationException(org.apache.hadoop.hive.metastore.api.InvalidOperationException) TException(org.apache.thrift.TException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) NoSuchObjectException(org.apache.hadoop.hive.metastore.api.NoSuchObjectException) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) HiveMetaException(org.apache.hadoop.hive.metastore.HiveMetaException) FileNotFoundException(java.io.FileNotFoundException) JDODataStoreException(javax.jdo.JDODataStoreException) ExecutorService(java.util.concurrent.ExecutorService) HdfsUtils(org.apache.hadoop.hive.io.HdfsUtils) Future(java.util.concurrent.Future) ThreadFactoryBuilder(com.google.common.util.concurrent.ThreadFactoryBuilder) ObjectPair(org.apache.hadoop.hive.common.ObjectPair)

Example 20 with ThreadFactoryBuilder

use of com.google.common.util.concurrent.ThreadFactoryBuilder in project hive by apache.

the class StatsNoJobTask method execute.

@Override
public int execute(DriverContext driverContext) {
    LOG.info("Executing stats (no job) task");
    String tableName = "";
    ExecutorService threadPool = null;
    Hive db = getHive();
    try {
        tableName = work.getTableSpecs().tableName;
        table = db.getTable(tableName);
        int numThreads = HiveConf.getIntVar(conf, ConfVars.HIVE_STATS_GATHER_NUM_THREADS);
        tableFullName = table.getDbName() + "." + table.getTableName();
        threadPool = Executors.newFixedThreadPool(numThreads, new ThreadFactoryBuilder().setDaemon(true).setNameFormat("StatsNoJobTask-Thread-%d").build());
        partUpdates = new MapMaker().concurrencyLevel(numThreads).makeMap();
        LOG.info("Initialized threadpool for stats computation with " + numThreads + " threads");
    } catch (HiveException e) {
        LOG.error("Cannot get table " + tableName, e);
        console.printError("Cannot get table " + tableName, e.toString());
    }
    return aggregateStats(threadPool, db);
}
Also used : Hive(org.apache.hadoop.hive.ql.metadata.Hive) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) ExecutorService(java.util.concurrent.ExecutorService) MapMaker(com.google.common.collect.MapMaker) ThreadFactoryBuilder(com.google.common.util.concurrent.ThreadFactoryBuilder)

Aggregations

ThreadFactoryBuilder (com.google.common.util.concurrent.ThreadFactoryBuilder)124 ThreadFactory (java.util.concurrent.ThreadFactory)38 ExecutorService (java.util.concurrent.ExecutorService)35 IOException (java.io.IOException)19 ThreadPoolExecutor (java.util.concurrent.ThreadPoolExecutor)18 Future (java.util.concurrent.Future)16 ExecutionException (java.util.concurrent.ExecutionException)14 ArrayList (java.util.ArrayList)10 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)10 HashMap (java.util.HashMap)9 HashSet (java.util.HashSet)9 Callable (java.util.concurrent.Callable)9 ScheduledExecutorService (java.util.concurrent.ScheduledExecutorService)9 Path (org.apache.hadoop.fs.Path)9 Test (org.junit.Test)9 LinkedList (java.util.LinkedList)8 Map (java.util.Map)8 Before (org.junit.Before)8 LinkedBlockingQueue (java.util.concurrent.LinkedBlockingQueue)7 ScheduledThreadPoolExecutor (java.util.concurrent.ScheduledThreadPoolExecutor)7