Search in sources :

Example 41 with Future

use of java.util.concurrent.Future in project storm by apache.

the class Localizer method updateBlobs.

/**
   * This function updates blobs on the supervisor. It uses a separate thread pool and runs
   * asynchronously of the download and delete.
   */
public List<LocalizedResource> updateBlobs(List<LocalResource> localResources, String user) throws AuthorizationException, KeyNotFoundException, IOException {
    LocalizedResourceSet lrsrcSet = _userRsrc.get(user);
    ArrayList<LocalizedResource> results = new ArrayList<>();
    ArrayList<Callable<LocalizedResource>> updates = new ArrayList<>();
    if (lrsrcSet == null) {
        // resource set must have been removed
        return results;
    }
    ClientBlobStore blobstore = null;
    try {
        blobstore = getClientBlobStore();
        for (LocalResource localResource : localResources) {
            String key = localResource.getBlobName();
            LocalizedResource lrsrc = lrsrcSet.get(key, localResource.shouldUncompress());
            if (lrsrc == null) {
                LOG.warn("blob requested for update doesn't exist: {}", key);
                continue;
            } else {
                // update it if either the version isn't the latest or if any local blob files are missing
                if (!isLocalizedResourceUpToDate(lrsrc, blobstore) || !isLocalizedResourceDownloaded(lrsrc)) {
                    LOG.debug("updating blob: {}", key);
                    updates.add(new DownloadBlob(this, _conf, key, new File(lrsrc.getFilePath()), user, lrsrc.isUncompressed(), true));
                }
            }
        }
    } finally {
        if (blobstore != null) {
            blobstore.shutdown();
        }
    }
    try {
        List<Future<LocalizedResource>> futures = _updateExecService.invokeAll(updates);
        for (Future<LocalizedResource> futureRsrc : futures) {
            try {
                LocalizedResource lrsrc = futureRsrc.get();
                // put the resource just in case it was removed at same time by the cleaner
                LocalizedResourceSet newSet = new LocalizedResourceSet(user);
                LocalizedResourceSet newlrsrcSet = _userRsrc.putIfAbsent(user, newSet);
                if (newlrsrcSet == null) {
                    newlrsrcSet = newSet;
                }
                newlrsrcSet.putIfAbsent(lrsrc.getKey(), lrsrc, lrsrc.isUncompressed());
                results.add(lrsrc);
            } catch (ExecutionException e) {
                LOG.error("Error updating blob: ", e);
                if (e.getCause() instanceof AuthorizationException) {
                    throw (AuthorizationException) e.getCause();
                }
                if (e.getCause() instanceof KeyNotFoundException) {
                    throw (KeyNotFoundException) e.getCause();
                }
            }
        }
    } catch (RejectedExecutionException re) {
        LOG.error("Error updating blobs : ", re);
    } catch (InterruptedException ie) {
        throw new IOException("Interrupted Exception", ie);
    }
    return results;
}
Also used : ClientBlobStore(org.apache.storm.blobstore.ClientBlobStore) AuthorizationException(org.apache.storm.generated.AuthorizationException) ArrayList(java.util.ArrayList) IOException(java.io.IOException) Callable(java.util.concurrent.Callable) RejectedExecutionException(java.util.concurrent.RejectedExecutionException) Future(java.util.concurrent.Future) RejectedExecutionException(java.util.concurrent.RejectedExecutionException) ExecutionException(java.util.concurrent.ExecutionException) File(java.io.File) KeyNotFoundException(org.apache.storm.generated.KeyNotFoundException)

Example 42 with Future

use of java.util.concurrent.Future in project hive by apache.

the class StatsUtils method getFileSizeForPartitions.

/**
   * Find the bytes on disks occupied by list of partitions
   * @param conf
   *          - hive conf
   * @param parts
   *          - partition list
   * @return sizes of partitions
   */
public static List<Long> getFileSizeForPartitions(final HiveConf conf, List<Partition> parts) {
    LOG.info("Number of partitions : " + parts.size());
    ArrayList<Future<Long>> futures = new ArrayList<>();
    int threads = Math.max(1, conf.getIntVar(ConfVars.METASTORE_FS_HANDLER_THREADS_COUNT));
    final ExecutorService pool = Executors.newFixedThreadPool(threads, new ThreadFactoryBuilder().setDaemon(true).setNameFormat("Get-Partitions-Size-%d").build());
    final ArrayList<Long> sizes = new ArrayList<>(parts.size());
    for (final Partition part : parts) {
        final Path path = part.getDataLocation();
        futures.add(pool.submit(new Callable<Long>() {

            @Override
            public Long call() throws Exception {
                try {
                    LOG.debug("Partition path : " + path);
                    FileSystem fs = path.getFileSystem(conf);
                    return fs.getContentSummary(path).getLength();
                } catch (IOException e) {
                    return 0L;
                }
            }
        }));
    }
    try {
        for (int i = 0; i < futures.size(); i++) {
            sizes.add(i, futures.get(i).get());
        }
    } catch (InterruptedException | ExecutionException e) {
        LOG.warn("Exception in processing files ", e);
    } finally {
        pool.shutdownNow();
    }
    return sizes;
}
Also used : Path(org.apache.hadoop.fs.Path) Partition(org.apache.hadoop.hive.ql.metadata.Partition) ArrayList(java.util.ArrayList) IOException(java.io.IOException) Callable(java.util.concurrent.Callable) FileSystem(org.apache.hadoop.fs.FileSystem) ExecutorService(java.util.concurrent.ExecutorService) Future(java.util.concurrent.Future) ThreadFactoryBuilder(com.google.common.util.concurrent.ThreadFactoryBuilder) ExecutionException(java.util.concurrent.ExecutionException)

Example 43 with Future

use of java.util.concurrent.Future in project hive by apache.

the class Utilities method getInputPaths.

/**
   * Computes a list of all input paths needed to compute the given MapWork. All aliases
   * are considered and a merged list of input paths is returned. If any input path points
   * to an empty table or partition a dummy file in the scratch dir is instead created and
   * added to the list. This is needed to avoid special casing the operator pipeline for
   * these cases.
   *
   * @param job JobConf used to run the job
   * @param work MapWork encapsulating the info about the task
   * @param hiveScratchDir The tmp dir used to create dummy files if needed
   * @param ctx Context object
   * @return List of paths to process for the given MapWork
   * @throws Exception
   */
public static List<Path> getInputPaths(JobConf job, MapWork work, Path hiveScratchDir, Context ctx, boolean skipDummy) throws Exception {
    Set<Path> pathsProcessed = new HashSet<Path>();
    List<Path> pathsToAdd = new LinkedList<Path>();
    // AliasToWork contains all the aliases
    for (String alias : work.getAliasToWork().keySet()) {
        LOG.info("Processing alias " + alias);
        // The alias may not have any path
        boolean isEmptyTable = true;
        boolean hasLogged = false;
        // Note: this copies the list because createDummyFileForEmptyPartition may modify the map.
        for (Path file : new LinkedList<Path>(work.getPathToAliases().keySet())) {
            List<String> aliases = work.getPathToAliases().get(file);
            if (aliases.contains(alias)) {
                if (file != null) {
                    isEmptyTable = false;
                } else {
                    LOG.warn("Found a null path for alias " + alias);
                    continue;
                }
                // processed only once
                if (pathsProcessed.contains(file)) {
                    continue;
                }
                StringInternUtils.internUriStringsInPath(file);
                pathsProcessed.add(file);
                if (LOG.isDebugEnabled()) {
                    LOG.debug("Adding input file " + file);
                } else if (!hasLogged) {
                    hasLogged = true;
                    LOG.info("Adding " + work.getPathToAliases().size() + " inputs; the first input is " + file);
                }
                pathsToAdd.add(file);
            }
        }
        // rows)
        if (isEmptyTable && !skipDummy) {
            pathsToAdd.add(createDummyFileForEmptyTable(job, work, hiveScratchDir, alias));
        }
    }
    ExecutorService pool = null;
    int numExecutors = getMaxExecutorsForInputListing(job, pathsToAdd.size());
    if (numExecutors > 1) {
        pool = Executors.newFixedThreadPool(numExecutors, new ThreadFactoryBuilder().setDaemon(true).setNameFormat("Get-Input-Paths-%d").build());
    }
    List<Path> finalPathsToAdd = new LinkedList<>();
    List<Future<Path>> futures = new LinkedList<>();
    for (final Path path : pathsToAdd) {
        if (pool == null) {
            finalPathsToAdd.add(new GetInputPathsCallable(path, job, work, hiveScratchDir, ctx, skipDummy).call());
        } else {
            futures.add(pool.submit(new GetInputPathsCallable(path, job, work, hiveScratchDir, ctx, skipDummy)));
        }
    }
    if (pool != null) {
        for (Future<Path> future : futures) {
            finalPathsToAdd.add(future.get());
        }
    }
    return finalPathsToAdd;
}
Also used : Path(org.apache.hadoop.fs.Path) LinkedList(java.util.LinkedList) ExecutorService(java.util.concurrent.ExecutorService) ThreadFactoryBuilder(com.google.common.util.concurrent.ThreadFactoryBuilder) Future(java.util.concurrent.Future) HashSet(java.util.HashSet)

Example 44 with Future

use of java.util.concurrent.Future in project hive by apache.

the class Utilities method getInputSummary.

/**
   * Calculate the total size of input files.
   *
   * @param ctx
   *          the hadoop job context
   * @param work
   *          map reduce job plan
   * @param filter
   *          filter to apply to the input paths before calculating size
   * @return the summary of all the input paths.
   * @throws IOException
   */
public static ContentSummary getInputSummary(final Context ctx, MapWork work, PathFilter filter) throws IOException {
    PerfLogger perfLogger = SessionState.getPerfLogger();
    perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.INPUT_SUMMARY);
    long[] summary = { 0, 0, 0 };
    final Set<Path> pathNeedProcess = new HashSet<>();
    // this method will avoid number of threads out of control.
    synchronized (INPUT_SUMMARY_LOCK) {
        // For each input path, calculate the total size.
        for (Path path : work.getPathToAliases().keySet()) {
            Path p = path;
            if (filter != null && !filter.accept(p)) {
                continue;
            }
            ContentSummary cs = ctx.getCS(path);
            if (cs == null) {
                if (path == null) {
                    continue;
                }
                pathNeedProcess.add(path);
            } else {
                summary[0] += cs.getLength();
                summary[1] += cs.getFileCount();
                summary[2] += cs.getDirectoryCount();
            }
        }
        // Process the case when name node call is needed
        final Map<String, ContentSummary> resultMap = new ConcurrentHashMap<String, ContentSummary>();
        ArrayList<Future<?>> results = new ArrayList<Future<?>>();
        final ExecutorService executor;
        int numExecutors = getMaxExecutorsForInputListing(ctx.getConf(), pathNeedProcess.size());
        if (numExecutors > 1) {
            LOG.info("Using " + numExecutors + " threads for getContentSummary");
            executor = Executors.newFixedThreadPool(numExecutors, new ThreadFactoryBuilder().setDaemon(true).setNameFormat("Get-Input-Summary-%d").build());
        } else {
            executor = null;
        }
        HiveInterruptCallback interrup = HiveInterruptUtils.add(new HiveInterruptCallback() {

            @Override
            public void interrupt() {
                for (Path path : pathNeedProcess) {
                    try {
                        path.getFileSystem(ctx.getConf()).close();
                    } catch (IOException ignore) {
                        LOG.debug("Failed to close filesystem", ignore);
                    }
                }
                if (executor != null) {
                    executor.shutdownNow();
                }
            }
        });
        try {
            Configuration conf = ctx.getConf();
            JobConf jobConf = new JobConf(conf);
            for (Path path : pathNeedProcess) {
                final Path p = path;
                final String pathStr = path.toString();
                // All threads share the same Configuration and JobConf based on the
                // assumption that they are thread safe if only read operations are
                // executed. It is not stated in Hadoop's javadoc, the sourcce codes
                // clearly showed that they made efforts for it and we believe it is
                // thread safe. Will revisit this piece of codes if we find the assumption
                // is not correct.
                final Configuration myConf = conf;
                final JobConf myJobConf = jobConf;
                final Map<String, Operator<?>> aliasToWork = work.getAliasToWork();
                final Map<Path, ArrayList<String>> pathToAlias = work.getPathToAliases();
                final PartitionDesc partDesc = work.getPathToPartitionInfo().get(p);
                Runnable r = new Runnable() {

                    @Override
                    public void run() {
                        try {
                            Class<? extends InputFormat> inputFormatCls = partDesc.getInputFileFormatClass();
                            InputFormat inputFormatObj = HiveInputFormat.getInputFormatFromCache(inputFormatCls, myJobConf);
                            if (inputFormatObj instanceof ContentSummaryInputFormat) {
                                ContentSummaryInputFormat cs = (ContentSummaryInputFormat) inputFormatObj;
                                resultMap.put(pathStr, cs.getContentSummary(p, myJobConf));
                                return;
                            }
                            String metaTableStorage = null;
                            if (partDesc.getTableDesc() != null && partDesc.getTableDesc().getProperties() != null) {
                                metaTableStorage = partDesc.getTableDesc().getProperties().getProperty(hive_metastoreConstants.META_TABLE_STORAGE, null);
                            }
                            if (partDesc.getProperties() != null) {
                                metaTableStorage = partDesc.getProperties().getProperty(hive_metastoreConstants.META_TABLE_STORAGE, metaTableStorage);
                            }
                            HiveStorageHandler handler = HiveUtils.getStorageHandler(myConf, metaTableStorage);
                            if (handler instanceof InputEstimator) {
                                long total = 0;
                                TableDesc tableDesc = partDesc.getTableDesc();
                                InputEstimator estimator = (InputEstimator) handler;
                                for (String alias : HiveFileFormatUtils.doGetAliasesFromPath(pathToAlias, p)) {
                                    JobConf jobConf = new JobConf(myJobConf);
                                    TableScanOperator scanOp = (TableScanOperator) aliasToWork.get(alias);
                                    Utilities.setColumnNameList(jobConf, scanOp, true);
                                    Utilities.setColumnTypeList(jobConf, scanOp, true);
                                    PlanUtils.configureInputJobPropertiesForStorageHandler(tableDesc);
                                    Utilities.copyTableJobPropertiesToConf(tableDesc, jobConf);
                                    total += estimator.estimate(jobConf, scanOp, -1).getTotalLength();
                                }
                                resultMap.put(pathStr, new ContentSummary(total, -1, -1));
                            } else {
                                // todo: should nullify summary for non-native tables,
                                // not to be selected as a mapjoin target
                                FileSystem fs = p.getFileSystem(myConf);
                                resultMap.put(pathStr, fs.getContentSummary(p));
                            }
                        } catch (Exception e) {
                            // We safely ignore this exception for summary data.
                            // We don't update the cache to protect it from polluting other
                            // usages. The worst case is that IOException will always be
                            // retried for another getInputSummary(), which is fine as
                            // IOException is not considered as a common case.
                            LOG.info("Cannot get size of " + pathStr + ". Safely ignored.");
                        }
                    }
                };
                if (executor == null) {
                    r.run();
                } else {
                    Future<?> result = executor.submit(r);
                    results.add(result);
                }
            }
            if (executor != null) {
                for (Future<?> result : results) {
                    boolean executorDone = false;
                    do {
                        try {
                            result.get();
                            executorDone = true;
                        } catch (InterruptedException e) {
                            LOG.info("Interrupted when waiting threads: ", e);
                            Thread.currentThread().interrupt();
                            break;
                        } catch (ExecutionException e) {
                            throw new IOException(e);
                        }
                    } while (!executorDone);
                }
                executor.shutdown();
            }
            HiveInterruptUtils.checkInterrupted();
            for (Map.Entry<String, ContentSummary> entry : resultMap.entrySet()) {
                ContentSummary cs = entry.getValue();
                summary[0] += cs.getLength();
                summary[1] += cs.getFileCount();
                summary[2] += cs.getDirectoryCount();
                ctx.addCS(entry.getKey(), cs);
                LOG.info("Cache Content Summary for " + entry.getKey() + " length: " + cs.getLength() + " file count: " + cs.getFileCount() + " directory count: " + cs.getDirectoryCount());
            }
            perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.INPUT_SUMMARY);
            return new ContentSummary(summary[0], summary[1], summary[2]);
        } finally {
            HiveInterruptUtils.remove(interrup);
        }
    }
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) PerfLogger(org.apache.hadoop.hive.ql.log.PerfLogger) ArrayList(java.util.ArrayList) ContentSummaryInputFormat(org.apache.hadoop.hive.ql.io.ContentSummaryInputFormat) FileSystem(org.apache.hadoop.fs.FileSystem) ThreadFactoryBuilder(com.google.common.util.concurrent.ThreadFactoryBuilder) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) ExecutionException(java.util.concurrent.ExecutionException) JobConf(org.apache.hadoop.mapred.JobConf) HashSet(java.util.HashSet) Path(org.apache.hadoop.fs.Path) InputEstimator(org.apache.hadoop.hive.ql.metadata.InputEstimator) HiveStorageHandler(org.apache.hadoop.hive.ql.metadata.HiveStorageHandler) HiveInterruptCallback(org.apache.hadoop.hive.common.HiveInterruptCallback) IOException(java.io.IOException) SQLFeatureNotSupportedException(java.sql.SQLFeatureNotSupportedException) SQLTransientException(java.sql.SQLTransientException) SQLException(java.sql.SQLException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) EOFException(java.io.EOFException) FileNotFoundException(java.io.FileNotFoundException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) SequenceFileInputFormat(org.apache.hadoop.mapred.SequenceFileInputFormat) ReworkMapredInputFormat(org.apache.hadoop.hive.ql.io.ReworkMapredInputFormat) ContentSummaryInputFormat(org.apache.hadoop.hive.ql.io.ContentSummaryInputFormat) InputFormat(org.apache.hadoop.mapred.InputFormat) FileInputFormat(org.apache.hadoop.mapred.FileInputFormat) OneNullRowInputFormat(org.apache.hadoop.hive.ql.io.OneNullRowInputFormat) HiveInputFormat(org.apache.hadoop.hive.ql.io.HiveInputFormat) ContentSummary(org.apache.hadoop.fs.ContentSummary) ExecutorService(java.util.concurrent.ExecutorService) Future(java.util.concurrent.Future) PartitionDesc(org.apache.hadoop.hive.ql.plan.PartitionDesc) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) Map(java.util.Map) LinkedHashMap(java.util.LinkedHashMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap)

Example 45 with Future

use of java.util.concurrent.Future in project hive by apache.

the class Hive method trashFiles.

/**
   * Trashes or deletes all files under a directory. Leaves the directory as is.
   * @param fs FileSystem to use
   * @param statuses fileStatuses of files to be deleted
   * @param conf hive configuration
   * @return true if deletion successful
   * @throws IOException
   */
public static boolean trashFiles(final FileSystem fs, final FileStatus[] statuses, final Configuration conf) throws IOException {
    boolean result = true;
    if (statuses == null || statuses.length == 0) {
        return false;
    }
    final List<Future<Boolean>> futures = new LinkedList<>();
    final ExecutorService pool = conf.getInt(ConfVars.HIVE_MOVE_FILES_THREAD_COUNT.varname, 25) > 0 ? Executors.newFixedThreadPool(conf.getInt(ConfVars.HIVE_MOVE_FILES_THREAD_COUNT.varname, 25), new ThreadFactoryBuilder().setDaemon(true).setNameFormat("Delete-Thread-%d").build()) : null;
    final SessionState parentSession = SessionState.get();
    for (final FileStatus status : statuses) {
        if (null == pool) {
            result &= FileUtils.moveToTrash(fs, status.getPath(), conf);
        } else {
            futures.add(pool.submit(new Callable<Boolean>() {

                @Override
                public Boolean call() throws Exception {
                    SessionState.setCurrentSessionState(parentSession);
                    return FileUtils.moveToTrash(fs, status.getPath(), conf);
                }
            }));
        }
    }
    if (null != pool) {
        pool.shutdown();
        for (Future<Boolean> future : futures) {
            try {
                result &= future.get();
            } catch (InterruptedException | ExecutionException e) {
                LOG.error("Failed to delete: ", e);
                pool.shutdownNow();
                throw new IOException(e);
            }
        }
    }
    return result;
}
Also used : SessionState(org.apache.hadoop.hive.ql.session.SessionState) FileStatus(org.apache.hadoop.fs.FileStatus) IOException(java.io.IOException) LinkedList(java.util.LinkedList) Callable(java.util.concurrent.Callable) ExecutorService(java.util.concurrent.ExecutorService) Future(java.util.concurrent.Future) ThreadFactoryBuilder(com.google.common.util.concurrent.ThreadFactoryBuilder) ExecutionException(java.util.concurrent.ExecutionException)

Aggregations

Future (java.util.concurrent.Future)1138 ArrayList (java.util.ArrayList)479 ExecutorService (java.util.concurrent.ExecutorService)445 Test (org.junit.Test)413 ExecutionException (java.util.concurrent.ExecutionException)264 Callable (java.util.concurrent.Callable)206 IOException (java.io.IOException)177 ParallelTest (com.hazelcast.test.annotation.ParallelTest)148 QuickTest (com.hazelcast.test.annotation.QuickTest)148 HashMap (java.util.HashMap)92 List (java.util.List)84 CountDownLatch (java.util.concurrent.CountDownLatch)71 LinkedList (java.util.LinkedList)67 TimeoutException (java.util.concurrent.TimeoutException)63 HashSet (java.util.HashSet)62 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)59 Map (java.util.Map)58 ICompletableFuture (com.hazelcast.core.ICompletableFuture)57 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)53 File (java.io.File)46