Search in sources :

Example 61 with ThreadFactoryBuilder

use of com.google.common.util.concurrent.ThreadFactoryBuilder in project hive by apache.

the class MetaStoreUtils method aggrPartitionStats.

public static List<ColumnStatisticsObj> aggrPartitionStats(Map<ColumnStatsAggregator, List<ColStatsObjWithSourceInfo>> colStatsMap, final List<String> partNames, final boolean areAllPartsFound, final boolean useDensityFunctionForNDVEstimation, final double ndvTuner) throws MetaException {
    List<ColumnStatisticsObj> aggrColStatObjs = new ArrayList<ColumnStatisticsObj>();
    int numProcessors = Runtime.getRuntime().availableProcessors();
    final ExecutorService pool = Executors.newFixedThreadPool(Math.min(colStatsMap.size(), numProcessors), new ThreadFactoryBuilder().setDaemon(true).setNameFormat("aggr-col-stats-%d").build());
    final List<Future<ColumnStatisticsObj>> futures = Lists.newLinkedList();
    LOG.debug("Aggregating column stats. Threads used: {}", Math.min(colStatsMap.size(), numProcessors));
    long start = System.currentTimeMillis();
    for (final Entry<ColumnStatsAggregator, List<ColStatsObjWithSourceInfo>> entry : colStatsMap.entrySet()) {
        futures.add(pool.submit(new Callable<ColumnStatisticsObj>() {

            @Override
            public ColumnStatisticsObj call() throws MetaException {
                List<ColStatsObjWithSourceInfo> colStatWithSourceInfo = entry.getValue();
                ColumnStatsAggregator aggregator = entry.getKey();
                try {
                    ColumnStatisticsObj statsObj = aggregator.aggregate(colStatWithSourceInfo, partNames, areAllPartsFound);
                    return statsObj;
                } catch (MetaException e) {
                    LOG.debug(e.getMessage());
                    throw e;
                }
            }
        }));
    }
    pool.shutdown();
    if (!futures.isEmpty()) {
        for (Future<ColumnStatisticsObj> future : futures) {
            try {
                if (future.get() != null) {
                    aggrColStatObjs.add(future.get());
                }
            } catch (InterruptedException | ExecutionException e) {
                LOG.debug(e.getMessage());
                pool.shutdownNow();
                throw new MetaException(e.toString());
            }
        }
    }
    LOG.debug("Time for aggr col stats in seconds: {} Threads used: {}", ((System.currentTimeMillis() - (double) start)) / 1000, Math.min(colStatsMap.size(), numProcessors));
    return aggrColStatObjs;
}
Also used : ArrayList(java.util.ArrayList) Callable(java.util.concurrent.Callable) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) ColumnStatsAggregator(org.apache.hadoop.hive.metastore.columnstats.aggr.ColumnStatsAggregator) ExecutorService(java.util.concurrent.ExecutorService) ThreadFactoryBuilder(com.google.common.util.concurrent.ThreadFactoryBuilder) Future(java.util.concurrent.Future) MachineList(org.apache.hadoop.util.MachineList) List(java.util.List) ArrayList(java.util.ArrayList) ExecutionException(java.util.concurrent.ExecutionException) MetaException(org.apache.hadoop.hive.metastore.api.MetaException)

Example 62 with ThreadFactoryBuilder

use of com.google.common.util.concurrent.ThreadFactoryBuilder in project hive by apache.

the class Hive method copyFiles.

private static void copyFiles(final HiveConf conf, final FileSystem destFs, FileStatus[] srcs, final FileSystem srcFs, final Path destf, final boolean isSrcLocal, boolean isOverwrite, final List<Path> newFiles, boolean acidRename) throws HiveException {
    final HdfsUtils.HadoopFileStatus fullDestStatus;
    try {
        fullDestStatus = new HdfsUtils.HadoopFileStatus(conf, destFs, destf);
    } catch (IOException e1) {
        throw new HiveException(e1);
    }
    if (!fullDestStatus.getFileStatus().isDirectory()) {
        throw new HiveException(destf + " is not a directory.");
    }
    final List<Future<ObjectPair<Path, Path>>> futures = new LinkedList<>();
    final ExecutorService pool = conf.getInt(ConfVars.HIVE_MOVE_FILES_THREAD_COUNT.varname, 25) > 0 ? Executors.newFixedThreadPool(conf.getInt(ConfVars.HIVE_MOVE_FILES_THREAD_COUNT.varname, 25), new ThreadFactoryBuilder().setDaemon(true).setNameFormat("Move-Thread-%d").build()) : null;
    // For ACID non-bucketed case, the filenames have to be in the format consistent with INSERT/UPDATE/DELETE Ops,
    // i.e, like 000000_0, 000001_0_copy_1, 000002_0.gz etc.
    // The extension is only maintained for files which are compressed.
    int taskId = 0;
    // Sort the files
    Arrays.sort(srcs);
    for (FileStatus src : srcs) {
        FileStatus[] files;
        if (src.isDirectory()) {
            try {
                files = srcFs.listStatus(src.getPath(), FileUtils.HIDDEN_FILES_PATH_FILTER);
            } catch (IOException e) {
                pool.shutdownNow();
                throw new HiveException(e);
            }
        } else {
            files = new FileStatus[] { src };
        }
        final SessionState parentSession = SessionState.get();
        // Sort the files
        Arrays.sort(files);
        for (final FileStatus srcFile : files) {
            final Path srcP = srcFile.getPath();
            final boolean needToCopy = needToCopy(srcP, destf, srcFs, destFs);
            final boolean isRenameAllowed = !needToCopy && !isSrcLocal;
            final String msg = "Unable to move source " + srcP + " to destination " + destf;
            // copy from source to destination, we will inherit the destination's parent group ownership.
            if (null == pool) {
                try {
                    Path destPath = mvFile(conf, srcFs, srcP, destFs, destf, isSrcLocal, isOverwrite, isRenameAllowed, acidRename ? taskId++ : -1);
                    if (null != newFiles) {
                        newFiles.add(destPath);
                    }
                } catch (Exception e) {
                    throw getHiveException(e, msg, "Failed to move: {}");
                }
            } else {
                // future only takes final or seemingly final values. Make a final copy of taskId
                final int finalTaskId = acidRename ? taskId++ : -1;
                futures.add(pool.submit(new Callable<ObjectPair<Path, Path>>() {

                    @Override
                    public ObjectPair<Path, Path> call() throws HiveException {
                        SessionState.setCurrentSessionState(parentSession);
                        try {
                            Path destPath = mvFile(conf, srcFs, srcP, destFs, destf, isSrcLocal, isOverwrite, isRenameAllowed, finalTaskId);
                            if (null != newFiles) {
                                newFiles.add(destPath);
                            }
                            return ObjectPair.create(srcP, destPath);
                        } catch (Exception e) {
                            throw getHiveException(e, msg);
                        }
                    }
                }));
            }
        }
    }
    if (null != pool) {
        pool.shutdown();
        for (Future<ObjectPair<Path, Path>> future : futures) {
            try {
                ObjectPair<Path, Path> pair = future.get();
                LOG.debug("Moved src: {}, to dest: {}", pair.getFirst().toString(), pair.getSecond().toString());
            } catch (Exception e) {
                throw handlePoolException(pool, e);
            }
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) SessionState(org.apache.hadoop.hive.ql.session.SessionState) FileStatus(org.apache.hadoop.fs.FileStatus) IOException(java.io.IOException) LinkedList(java.util.LinkedList) SQLUniqueConstraint(org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint) SQLCheckConstraint(org.apache.hadoop.hive.metastore.api.SQLCheckConstraint) SQLNotNullConstraint(org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) AlreadyExistsException(org.apache.hadoop.hive.metastore.api.AlreadyExistsException) InvalidOperationException(org.apache.hadoop.hive.metastore.api.InvalidOperationException) TException(org.apache.thrift.TException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) NoSuchObjectException(org.apache.hadoop.hive.metastore.api.NoSuchObjectException) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) HiveMetaException(org.apache.hadoop.hive.metastore.HiveMetaException) FileNotFoundException(java.io.FileNotFoundException) JDODataStoreException(javax.jdo.JDODataStoreException) Callable(java.util.concurrent.Callable) ExecutorService(java.util.concurrent.ExecutorService) HdfsUtils(org.apache.hadoop.hive.io.HdfsUtils) Future(java.util.concurrent.Future) ThreadFactoryBuilder(com.google.common.util.concurrent.ThreadFactoryBuilder) ObjectPair(org.apache.hadoop.hive.common.ObjectPair)

Example 63 with ThreadFactoryBuilder

use of com.google.common.util.concurrent.ThreadFactoryBuilder in project hive by apache.

the class Hive method trashFiles.

/**
 * Trashes or deletes all files under a directory. Leaves the directory as is.
 * @param fs FileSystem to use
 * @param statuses fileStatuses of files to be deleted
 * @param conf hive configuration
 * @return true if deletion successful
 * @throws IOException
 */
public static boolean trashFiles(final FileSystem fs, final FileStatus[] statuses, final Configuration conf, final boolean purge) throws IOException {
    boolean result = true;
    if (statuses == null || statuses.length == 0) {
        return false;
    }
    final List<Future<Boolean>> futures = new LinkedList<>();
    final ExecutorService pool = conf.getInt(ConfVars.HIVE_MOVE_FILES_THREAD_COUNT.varname, 25) > 0 ? Executors.newFixedThreadPool(conf.getInt(ConfVars.HIVE_MOVE_FILES_THREAD_COUNT.varname, 25), new ThreadFactoryBuilder().setDaemon(true).setNameFormat("Delete-Thread-%d").build()) : null;
    final SessionState parentSession = SessionState.get();
    for (final FileStatus status : statuses) {
        if (null == pool) {
            result &= FileUtils.moveToTrash(fs, status.getPath(), conf, purge);
        } else {
            futures.add(pool.submit(new Callable<Boolean>() {

                @Override
                public Boolean call() throws Exception {
                    SessionState.setCurrentSessionState(parentSession);
                    return FileUtils.moveToTrash(fs, status.getPath(), conf, purge);
                }
            }));
        }
    }
    if (null != pool) {
        pool.shutdown();
        for (Future<Boolean> future : futures) {
            try {
                result &= future.get();
            } catch (InterruptedException | ExecutionException e) {
                LOG.error("Failed to delete: ", e);
                pool.shutdownNow();
                throw new IOException(e);
            }
        }
    }
    return result;
}
Also used : SessionState(org.apache.hadoop.hive.ql.session.SessionState) FileStatus(org.apache.hadoop.fs.FileStatus) IOException(java.io.IOException) LinkedList(java.util.LinkedList) Callable(java.util.concurrent.Callable) ExecutorService(java.util.concurrent.ExecutorService) Future(java.util.concurrent.Future) ThreadFactoryBuilder(com.google.common.util.concurrent.ThreadFactoryBuilder) ExecutionException(java.util.concurrent.ExecutionException)

Example 64 with ThreadFactoryBuilder

use of com.google.common.util.concurrent.ThreadFactoryBuilder in project hive by apache.

the class BasicStatsTask method buildBasicStatsExecutor.

private ExecutorService buildBasicStatsExecutor() {
    // Get the file status up-front for all partitions. Beneficial in cases of blob storage systems
    int poolSize = conf.getInt(ConfVars.HIVE_MOVE_FILES_THREAD_COUNT.varname, 1);
    // In case thread count is set to 0, use single thread.
    poolSize = Math.max(poolSize, 1);
    final ExecutorService pool = Executors.newFixedThreadPool(poolSize, new ThreadFactoryBuilder().setDaemon(true).setNameFormat("stats-updater-thread-%d").build());
    LOG.debug("Getting file stats of all partitions. threadpool size:" + poolSize);
    return pool;
}
Also used : ExecutorService(java.util.concurrent.ExecutorService) ThreadFactoryBuilder(com.google.common.util.concurrent.ThreadFactoryBuilder)

Example 65 with ThreadFactoryBuilder

use of com.google.common.util.concurrent.ThreadFactoryBuilder in project hive by apache.

the class StatsUtils method getFileSizeForPartitions.

/**
 * Find the bytes on disks occupied by list of partitions
 * @param conf
 *          - hive conf
 * @param parts
 *          - partition list
 * @return sizes of partitions
 */
public static List<Long> getFileSizeForPartitions(final HiveConf conf, List<Partition> parts) {
    LOG.info("Number of partitions : " + parts.size());
    ArrayList<Future<Long>> futures = new ArrayList<>();
    int threads = Math.max(1, conf.getIntVar(ConfVars.METASTORE_FS_HANDLER_THREADS_COUNT));
    final ExecutorService pool = Executors.newFixedThreadPool(threads, new ThreadFactoryBuilder().setDaemon(true).setNameFormat("Get-Partitions-Size-%d").build());
    final ArrayList<Long> sizes = new ArrayList<>(parts.size());
    for (final Partition part : parts) {
        final Path path = part.getDataLocation();
        futures.add(pool.submit(new Callable<Long>() {

            @Override
            public Long call() throws Exception {
                try {
                    LOG.debug("Partition path : " + path);
                    FileSystem fs = path.getFileSystem(conf);
                    return fs.getContentSummary(path).getLength();
                } catch (IOException e) {
                    return 0L;
                }
            }
        }));
    }
    try {
        for (int i = 0; i < futures.size(); i++) {
            sizes.add(i, futures.get(i).get());
        }
    } catch (InterruptedException | ExecutionException e) {
        LOG.warn("Exception in processing files ", e);
    } finally {
        pool.shutdownNow();
    }
    return sizes;
}
Also used : Path(org.apache.hadoop.fs.Path) Partition(org.apache.hadoop.hive.ql.metadata.Partition) ArrayList(java.util.ArrayList) IOException(java.io.IOException) Callable(java.util.concurrent.Callable) FileSystem(org.apache.hadoop.fs.FileSystem) ExecutorService(java.util.concurrent.ExecutorService) Future(java.util.concurrent.Future) ThreadFactoryBuilder(com.google.common.util.concurrent.ThreadFactoryBuilder) ExecutionException(java.util.concurrent.ExecutionException)

Aggregations

ThreadFactoryBuilder (com.google.common.util.concurrent.ThreadFactoryBuilder)143 ExecutorService (java.util.concurrent.ExecutorService)49 ThreadFactory (java.util.concurrent.ThreadFactory)46 IOException (java.io.IOException)23 Future (java.util.concurrent.Future)19 ThreadPoolExecutor (java.util.concurrent.ThreadPoolExecutor)19 ExecutionException (java.util.concurrent.ExecutionException)17 ArrayList (java.util.ArrayList)15 Callable (java.util.concurrent.Callable)12 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)12 HashMap (java.util.HashMap)11 Path (org.apache.hadoop.fs.Path)11 LinkedList (java.util.LinkedList)10 Map (java.util.Map)10 HashSet (java.util.HashSet)9 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)9 ScheduledExecutorService (java.util.concurrent.ScheduledExecutorService)9 Test (org.junit.Test)9 LinkedBlockingQueue (java.util.concurrent.LinkedBlockingQueue)8 Before (org.junit.Before)8