use of com.google.common.util.concurrent.ThreadFactoryBuilder in project hive by apache.
the class MetaStoreUtils method aggrPartitionStats.
public static List<ColumnStatisticsObj> aggrPartitionStats(Map<ColumnStatsAggregator, List<ColStatsObjWithSourceInfo>> colStatsMap, final List<String> partNames, final boolean areAllPartsFound, final boolean useDensityFunctionForNDVEstimation, final double ndvTuner) throws MetaException {
List<ColumnStatisticsObj> aggrColStatObjs = new ArrayList<ColumnStatisticsObj>();
int numProcessors = Runtime.getRuntime().availableProcessors();
final ExecutorService pool = Executors.newFixedThreadPool(Math.min(colStatsMap.size(), numProcessors), new ThreadFactoryBuilder().setDaemon(true).setNameFormat("aggr-col-stats-%d").build());
final List<Future<ColumnStatisticsObj>> futures = Lists.newLinkedList();
LOG.debug("Aggregating column stats. Threads used: {}", Math.min(colStatsMap.size(), numProcessors));
long start = System.currentTimeMillis();
for (final Entry<ColumnStatsAggregator, List<ColStatsObjWithSourceInfo>> entry : colStatsMap.entrySet()) {
futures.add(pool.submit(new Callable<ColumnStatisticsObj>() {
@Override
public ColumnStatisticsObj call() throws MetaException {
List<ColStatsObjWithSourceInfo> colStatWithSourceInfo = entry.getValue();
ColumnStatsAggregator aggregator = entry.getKey();
try {
ColumnStatisticsObj statsObj = aggregator.aggregate(colStatWithSourceInfo, partNames, areAllPartsFound);
return statsObj;
} catch (MetaException e) {
LOG.debug(e.getMessage());
throw e;
}
}
}));
}
pool.shutdown();
if (!futures.isEmpty()) {
for (Future<ColumnStatisticsObj> future : futures) {
try {
if (future.get() != null) {
aggrColStatObjs.add(future.get());
}
} catch (InterruptedException | ExecutionException e) {
LOG.debug(e.getMessage());
pool.shutdownNow();
throw new MetaException(e.toString());
}
}
}
LOG.debug("Time for aggr col stats in seconds: {} Threads used: {}", ((System.currentTimeMillis() - (double) start)) / 1000, Math.min(colStatsMap.size(), numProcessors));
return aggrColStatObjs;
}
use of com.google.common.util.concurrent.ThreadFactoryBuilder in project hive by apache.
the class Hive method copyFiles.
private static void copyFiles(final HiveConf conf, final FileSystem destFs, FileStatus[] srcs, final FileSystem srcFs, final Path destf, final boolean isSrcLocal, boolean isOverwrite, final List<Path> newFiles, boolean acidRename) throws HiveException {
final HdfsUtils.HadoopFileStatus fullDestStatus;
try {
fullDestStatus = new HdfsUtils.HadoopFileStatus(conf, destFs, destf);
} catch (IOException e1) {
throw new HiveException(e1);
}
if (!fullDestStatus.getFileStatus().isDirectory()) {
throw new HiveException(destf + " is not a directory.");
}
final List<Future<ObjectPair<Path, Path>>> futures = new LinkedList<>();
final ExecutorService pool = conf.getInt(ConfVars.HIVE_MOVE_FILES_THREAD_COUNT.varname, 25) > 0 ? Executors.newFixedThreadPool(conf.getInt(ConfVars.HIVE_MOVE_FILES_THREAD_COUNT.varname, 25), new ThreadFactoryBuilder().setDaemon(true).setNameFormat("Move-Thread-%d").build()) : null;
// For ACID non-bucketed case, the filenames have to be in the format consistent with INSERT/UPDATE/DELETE Ops,
// i.e, like 000000_0, 000001_0_copy_1, 000002_0.gz etc.
// The extension is only maintained for files which are compressed.
int taskId = 0;
// Sort the files
Arrays.sort(srcs);
for (FileStatus src : srcs) {
FileStatus[] files;
if (src.isDirectory()) {
try {
files = srcFs.listStatus(src.getPath(), FileUtils.HIDDEN_FILES_PATH_FILTER);
} catch (IOException e) {
pool.shutdownNow();
throw new HiveException(e);
}
} else {
files = new FileStatus[] { src };
}
final SessionState parentSession = SessionState.get();
// Sort the files
Arrays.sort(files);
for (final FileStatus srcFile : files) {
final Path srcP = srcFile.getPath();
final boolean needToCopy = needToCopy(srcP, destf, srcFs, destFs);
final boolean isRenameAllowed = !needToCopy && !isSrcLocal;
final String msg = "Unable to move source " + srcP + " to destination " + destf;
// copy from source to destination, we will inherit the destination's parent group ownership.
if (null == pool) {
try {
Path destPath = mvFile(conf, srcFs, srcP, destFs, destf, isSrcLocal, isOverwrite, isRenameAllowed, acidRename ? taskId++ : -1);
if (null != newFiles) {
newFiles.add(destPath);
}
} catch (Exception e) {
throw getHiveException(e, msg, "Failed to move: {}");
}
} else {
// future only takes final or seemingly final values. Make a final copy of taskId
final int finalTaskId = acidRename ? taskId++ : -1;
futures.add(pool.submit(new Callable<ObjectPair<Path, Path>>() {
@Override
public ObjectPair<Path, Path> call() throws HiveException {
SessionState.setCurrentSessionState(parentSession);
try {
Path destPath = mvFile(conf, srcFs, srcP, destFs, destf, isSrcLocal, isOverwrite, isRenameAllowed, finalTaskId);
if (null != newFiles) {
newFiles.add(destPath);
}
return ObjectPair.create(srcP, destPath);
} catch (Exception e) {
throw getHiveException(e, msg);
}
}
}));
}
}
}
if (null != pool) {
pool.shutdown();
for (Future<ObjectPair<Path, Path>> future : futures) {
try {
ObjectPair<Path, Path> pair = future.get();
LOG.debug("Moved src: {}, to dest: {}", pair.getFirst().toString(), pair.getSecond().toString());
} catch (Exception e) {
throw handlePoolException(pool, e);
}
}
}
}
use of com.google.common.util.concurrent.ThreadFactoryBuilder in project hive by apache.
the class Hive method trashFiles.
/**
* Trashes or deletes all files under a directory. Leaves the directory as is.
* @param fs FileSystem to use
* @param statuses fileStatuses of files to be deleted
* @param conf hive configuration
* @return true if deletion successful
* @throws IOException
*/
public static boolean trashFiles(final FileSystem fs, final FileStatus[] statuses, final Configuration conf, final boolean purge) throws IOException {
boolean result = true;
if (statuses == null || statuses.length == 0) {
return false;
}
final List<Future<Boolean>> futures = new LinkedList<>();
final ExecutorService pool = conf.getInt(ConfVars.HIVE_MOVE_FILES_THREAD_COUNT.varname, 25) > 0 ? Executors.newFixedThreadPool(conf.getInt(ConfVars.HIVE_MOVE_FILES_THREAD_COUNT.varname, 25), new ThreadFactoryBuilder().setDaemon(true).setNameFormat("Delete-Thread-%d").build()) : null;
final SessionState parentSession = SessionState.get();
for (final FileStatus status : statuses) {
if (null == pool) {
result &= FileUtils.moveToTrash(fs, status.getPath(), conf, purge);
} else {
futures.add(pool.submit(new Callable<Boolean>() {
@Override
public Boolean call() throws Exception {
SessionState.setCurrentSessionState(parentSession);
return FileUtils.moveToTrash(fs, status.getPath(), conf, purge);
}
}));
}
}
if (null != pool) {
pool.shutdown();
for (Future<Boolean> future : futures) {
try {
result &= future.get();
} catch (InterruptedException | ExecutionException e) {
LOG.error("Failed to delete: ", e);
pool.shutdownNow();
throw new IOException(e);
}
}
}
return result;
}
use of com.google.common.util.concurrent.ThreadFactoryBuilder in project hive by apache.
the class BasicStatsTask method buildBasicStatsExecutor.
private ExecutorService buildBasicStatsExecutor() {
// Get the file status up-front for all partitions. Beneficial in cases of blob storage systems
int poolSize = conf.getInt(ConfVars.HIVE_MOVE_FILES_THREAD_COUNT.varname, 1);
// In case thread count is set to 0, use single thread.
poolSize = Math.max(poolSize, 1);
final ExecutorService pool = Executors.newFixedThreadPool(poolSize, new ThreadFactoryBuilder().setDaemon(true).setNameFormat("stats-updater-thread-%d").build());
LOG.debug("Getting file stats of all partitions. threadpool size:" + poolSize);
return pool;
}
use of com.google.common.util.concurrent.ThreadFactoryBuilder in project hive by apache.
the class StatsUtils method getFileSizeForPartitions.
/**
* Find the bytes on disks occupied by list of partitions
* @param conf
* - hive conf
* @param parts
* - partition list
* @return sizes of partitions
*/
public static List<Long> getFileSizeForPartitions(final HiveConf conf, List<Partition> parts) {
LOG.info("Number of partitions : " + parts.size());
ArrayList<Future<Long>> futures = new ArrayList<>();
int threads = Math.max(1, conf.getIntVar(ConfVars.METASTORE_FS_HANDLER_THREADS_COUNT));
final ExecutorService pool = Executors.newFixedThreadPool(threads, new ThreadFactoryBuilder().setDaemon(true).setNameFormat("Get-Partitions-Size-%d").build());
final ArrayList<Long> sizes = new ArrayList<>(parts.size());
for (final Partition part : parts) {
final Path path = part.getDataLocation();
futures.add(pool.submit(new Callable<Long>() {
@Override
public Long call() throws Exception {
try {
LOG.debug("Partition path : " + path);
FileSystem fs = path.getFileSystem(conf);
return fs.getContentSummary(path).getLength();
} catch (IOException e) {
return 0L;
}
}
}));
}
try {
for (int i = 0; i < futures.size(); i++) {
sizes.add(i, futures.get(i).get());
}
} catch (InterruptedException | ExecutionException e) {
LOG.warn("Exception in processing files ", e);
} finally {
pool.shutdownNow();
}
return sizes;
}
Aggregations