use of java.util.concurrent.Callable in project hive by apache.
the class Hive method copyFiles.
private static void copyFiles(final HiveConf conf, final FileSystem destFs, FileStatus[] srcs, final FileSystem srcFs, final Path destf, final boolean isSrcLocal, final List<Path> newFiles) throws HiveException {
final HdfsUtils.HadoopFileStatus fullDestStatus;
try {
fullDestStatus = new HdfsUtils.HadoopFileStatus(conf, destFs, destf);
} catch (IOException e1) {
throw new HiveException(e1);
}
if (!fullDestStatus.getFileStatus().isDirectory()) {
throw new HiveException(destf + " is not a directory.");
}
final boolean inheritPerms = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_WAREHOUSE_SUBDIR_INHERIT_PERMS);
final List<Future<ObjectPair<Path, Path>>> futures = new LinkedList<>();
final ExecutorService pool = conf.getInt(ConfVars.HIVE_MOVE_FILES_THREAD_COUNT.varname, 25) > 0 ? Executors.newFixedThreadPool(conf.getInt(ConfVars.HIVE_MOVE_FILES_THREAD_COUNT.varname, 25), new ThreadFactoryBuilder().setDaemon(true).setNameFormat("Move-Thread-%d").build()) : null;
for (FileStatus src : srcs) {
FileStatus[] files;
if (src.isDirectory()) {
try {
files = srcFs.listStatus(src.getPath(), FileUtils.HIDDEN_FILES_PATH_FILTER);
} catch (IOException e) {
pool.shutdownNow();
throw new HiveException(e);
}
} else {
files = new FileStatus[] { src };
}
final SessionState parentSession = SessionState.get();
for (final FileStatus srcFile : files) {
final Path srcP = srcFile.getPath();
final boolean needToCopy = needToCopy(srcP, destf, srcFs, destFs);
final boolean isRenameAllowed = !needToCopy && !isSrcLocal;
// If we do a rename for a non-local file, we will be transfering the original
// file permissions from source to the destination. Else, in case of mvFile() where we
// copy from source to destination, we will inherit the destination's parent group ownership.
final String srcGroup = isRenameAllowed ? srcFile.getGroup() : fullDestStatus.getFileStatus().getGroup();
if (null == pool) {
try {
Path destPath = mvFile(conf, srcFs, srcP, destFs, destf, isSrcLocal, isRenameAllowed);
if (null != newFiles) {
newFiles.add(destPath);
}
} catch (IOException ioe) {
LOG.error("Failed to move: {}", ioe.getMessage());
throw new HiveException(ioe.getCause());
}
} else {
futures.add(pool.submit(new Callable<ObjectPair<Path, Path>>() {
@Override
public ObjectPair<Path, Path> call() throws Exception {
SessionState.setCurrentSessionState(parentSession);
Path destPath = mvFile(conf, srcFs, srcP, destFs, destf, isSrcLocal, isRenameAllowed);
if (inheritPerms) {
HdfsUtils.setFullFileStatus(conf, fullDestStatus, srcGroup, destFs, destPath, false);
}
if (null != newFiles) {
newFiles.add(destPath);
}
return ObjectPair.create(srcP, destPath);
}
}));
}
}
}
if (null == pool) {
if (inheritPerms) {
HdfsUtils.setFullFileStatus(conf, fullDestStatus, null, destFs, destf, true);
}
} else {
pool.shutdown();
for (Future<ObjectPair<Path, Path>> future : futures) {
try {
ObjectPair<Path, Path> pair = future.get();
LOG.debug("Moved src: {}", pair.getFirst().toString(), ", to dest: {}", pair.getSecond().toString());
} catch (Exception e) {
LOG.error("Failed to move: {}", e.getMessage());
pool.shutdownNow();
throw new HiveException(e.getCause());
}
}
}
}
use of java.util.concurrent.Callable in project hive by apache.
the class StatsTask method aggregateStats.
private int aggregateStats(Hive db) {
StatsAggregator statsAggregator = null;
int ret = 0;
StatsCollectionContext scc = null;
EnvironmentContext environmentContext = null;
try {
// Stats setup:
final Warehouse wh = new Warehouse(conf);
if (!getWork().getNoStatsAggregator() && !getWork().isNoScanAnalyzeCommand()) {
try {
scc = getContext();
statsAggregator = createStatsAggregator(scc, conf);
} catch (HiveException e) {
if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_STATS_RELIABLE)) {
throw e;
}
console.printError(ErrorMsg.STATS_SKIPPING_BY_ERROR.getErrorCodedMsg(e.toString()));
}
}
List<Partition> partitions = getPartitionsList(db);
boolean atomic = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_STATS_ATOMIC);
String tableFullName = table.getDbName() + "." + table.getTableName();
if (partitions == null) {
org.apache.hadoop.hive.metastore.api.Table tTable = table.getTTable();
Map<String, String> parameters = tTable.getParameters();
// acidTable will not have accurate stats unless it is set through analyze command.
if (work.getTableSpecs() == null && AcidUtils.isAcidTable(table)) {
StatsSetupConst.setBasicStatsState(parameters, StatsSetupConst.FALSE);
} else if (work.getTableSpecs() != null || (work.getLoadTableDesc() != null && work.getLoadTableDesc().getReplace()) || (work.getLoadFileDesc() != null && !work.getLoadFileDesc().getDestinationCreateTable().isEmpty())) {
StatsSetupConst.setBasicStatsState(parameters, StatsSetupConst.TRUE);
}
// non-partitioned tables:
if (!existStats(parameters) && atomic) {
return 0;
}
// For eg. if a file is being loaded, the old number of rows are not valid
if (work.isClearAggregatorStats()) {
// we choose to keep the invalid stats and only change the setting.
StatsSetupConst.setBasicStatsState(parameters, StatsSetupConst.FALSE);
}
updateQuickStats(wh, parameters, tTable.getSd());
if (StatsSetupConst.areBasicStatsUptoDate(parameters)) {
if (statsAggregator != null) {
String prefix = getAggregationPrefix(table, null);
updateStats(statsAggregator, parameters, prefix, atomic);
}
// write table stats to metastore
if (!getWork().getNoStatsAggregator()) {
environmentContext = new EnvironmentContext();
environmentContext.putToProperties(StatsSetupConst.STATS_GENERATED, StatsSetupConst.TASK);
}
}
getHive().alterTable(tableFullName, new Table(tTable), environmentContext);
if (conf.getBoolVar(ConfVars.TEZ_EXEC_SUMMARY)) {
console.printInfo("Table " + tableFullName + " stats: [" + toString(parameters) + ']');
}
LOG.info("Table " + tableFullName + " stats: [" + toString(parameters) + ']');
} else {
// Partitioned table:
// Need to get the old stats of the partition
// and update the table stats based on the old and new stats.
List<Partition> updates = new ArrayList<Partition>();
//Get the file status up-front for all partitions. Beneficial in cases of blob storage systems
final Map<String, FileStatus[]> fileStatusMap = new ConcurrentHashMap<String, FileStatus[]>();
int poolSize = conf.getInt(ConfVars.HIVE_MOVE_FILES_THREAD_COUNT.varname, 1);
// In case thread count is set to 0, use single thread.
poolSize = Math.max(poolSize, 1);
final ExecutorService pool = Executors.newFixedThreadPool(poolSize, new ThreadFactoryBuilder().setDaemon(true).setNameFormat("stats-updater-thread-%d").build());
final List<Future<Void>> futures = Lists.newLinkedList();
LOG.debug("Getting file stats of all partitions. threadpool size:" + poolSize);
try {
for (final Partition partn : partitions) {
final String partitionName = partn.getName();
final org.apache.hadoop.hive.metastore.api.Partition tPart = partn.getTPartition();
Map<String, String> parameters = tPart.getParameters();
if (!existStats(parameters) && atomic) {
continue;
}
futures.add(pool.submit(new Callable<Void>() {
@Override
public Void call() throws Exception {
FileStatus[] partfileStatus = wh.getFileStatusesForSD(tPart.getSd());
fileStatusMap.put(partitionName, partfileStatus);
return null;
}
}));
}
pool.shutdown();
for (Future<Void> future : futures) {
future.get();
}
} catch (InterruptedException e) {
LOG.debug("Cancelling " + futures.size() + " file stats lookup tasks");
//cancel other futures
for (Future future : futures) {
future.cancel(true);
}
// Fail the query if the stats are supposed to be reliable
if (work.isStatsReliable()) {
ret = 1;
}
} finally {
if (pool != null) {
pool.shutdownNow();
}
LOG.debug("Finished getting file stats of all partitions");
}
for (Partition partn : partitions) {
//
// get the old partition stats
//
org.apache.hadoop.hive.metastore.api.Partition tPart = partn.getTPartition();
Map<String, String> parameters = tPart.getParameters();
if (work.getTableSpecs() == null && AcidUtils.isAcidTable(table)) {
StatsSetupConst.setBasicStatsState(parameters, StatsSetupConst.FALSE);
} else if (work.getTableSpecs() != null || (work.getLoadTableDesc() != null && work.getLoadTableDesc().getReplace()) || (work.getLoadFileDesc() != null && !work.getLoadFileDesc().getDestinationCreateTable().isEmpty())) {
StatsSetupConst.setBasicStatsState(parameters, StatsSetupConst.TRUE);
}
//only when the stats exist, it is added to fileStatusMap
if (!fileStatusMap.containsKey(partn.getName())) {
continue;
}
// For eg. if a file is being loaded, the old number of rows are not valid
if (work.isClearAggregatorStats()) {
// we choose to keep the invalid stats and only change the setting.
StatsSetupConst.setBasicStatsState(parameters, StatsSetupConst.FALSE);
}
updateQuickStats(parameters, fileStatusMap.get(partn.getName()));
if (StatsSetupConst.areBasicStatsUptoDate(parameters)) {
if (statsAggregator != null) {
String prefix = getAggregationPrefix(table, partn);
updateStats(statsAggregator, parameters, prefix, atomic);
}
if (!getWork().getNoStatsAggregator()) {
environmentContext = new EnvironmentContext();
environmentContext.putToProperties(StatsSetupConst.STATS_GENERATED, StatsSetupConst.TASK);
}
}
updates.add(new Partition(table, tPart));
if (conf.getBoolVar(ConfVars.TEZ_EXEC_SUMMARY)) {
console.printInfo("Partition " + tableFullName + partn.getSpec() + " stats: [" + toString(parameters) + ']');
}
LOG.info("Partition " + tableFullName + partn.getSpec() + " stats: [" + toString(parameters) + ']');
}
if (!updates.isEmpty()) {
db.alterPartitions(tableFullName, updates, environmentContext);
}
}
} catch (Exception e) {
console.printInfo("[Warning] could not update stats.", "Failed with exception " + e.getMessage() + "\n" + StringUtils.stringifyException(e));
// Fail the query if the stats are supposed to be reliable
if (work.isStatsReliable()) {
ret = 1;
}
} finally {
if (statsAggregator != null) {
statsAggregator.closeConnection(scc);
}
}
// anything else indicates failure
return ret;
}
use of java.util.concurrent.Callable in project hive by apache.
the class DruidStorageHandler method commitCreateTable.
@Override
public void commitCreateTable(Table table) throws MetaException {
if (MetaStoreUtils.isExternalTable(table)) {
return;
}
Lifecycle lifecycle = new Lifecycle();
LOG.info(String.format("Committing table [%s] to the druid metastore", table.getDbName()));
final Path tableDir = getSegmentDescriptorDir();
try {
List<DataSegment> segmentList = DruidStorageHandlerUtils.getPublishedSegments(tableDir, getConf());
LOG.info(String.format("Found [%d] segments under path [%s]", segmentList.size(), tableDir));
druidSqlMetadataStorageUpdaterJobHandler.publishSegments(druidMetadataStorageTablesConfig.getSegmentsTable(), segmentList, DruidStorageHandlerUtils.JSON_MAPPER);
final String coordinatorAddress = HiveConf.getVar(getConf(), HiveConf.ConfVars.HIVE_DRUID_COORDINATOR_DEFAULT_ADDRESS);
int maxTries = HiveConf.getIntVar(getConf(), HiveConf.ConfVars.HIVE_DRUID_MAX_TRIES);
final String dataSourceName = table.getParameters().get(Constants.DRUID_DATA_SOURCE);
LOG.info(String.format("checking load status from coordinator [%s]", coordinatorAddress));
// check if the coordinator is up
httpClient = makeHttpClient(lifecycle);
try {
lifecycle.start();
} catch (Exception e) {
Throwables.propagate(e);
}
String coordinatorResponse = null;
try {
coordinatorResponse = RetryUtils.retry(new Callable<String>() {
@Override
public String call() throws Exception {
return DruidStorageHandlerUtils.getURL(httpClient, new URL(String.format("http://%s/status", coordinatorAddress)));
}
}, new Predicate<Throwable>() {
@Override
public boolean apply(@Nullable Throwable input) {
return input instanceof IOException;
}
}, maxTries);
} catch (Exception e) {
console.printInfo("Will skip waiting for data loading");
return;
}
if (Strings.isNullOrEmpty(coordinatorResponse)) {
console.printInfo("Will skip waiting for data loading");
return;
}
console.printInfo(String.format("Waiting for the loading of [%s] segments", segmentList.size()));
long passiveWaitTimeMs = HiveConf.getLongVar(getConf(), HiveConf.ConfVars.HIVE_DRUID_PASSIVE_WAIT_TIME);
ImmutableSet<URL> setOfUrls = FluentIterable.from(segmentList).transform(new Function<DataSegment, URL>() {
@Override
public URL apply(DataSegment dataSegment) {
try {
//Need to make sure that we are using UTC since most of the druid cluster use UTC by default
return new URL(String.format("http://%s/druid/coordinator/v1/datasources/%s/segments/%s", coordinatorAddress, dataSourceName, DataSegment.makeDataSegmentIdentifier(dataSegment.getDataSource(), new DateTime(dataSegment.getInterval().getStartMillis(), DateTimeZone.UTC), new DateTime(dataSegment.getInterval().getEndMillis(), DateTimeZone.UTC), dataSegment.getVersion(), dataSegment.getShardSpec())));
} catch (MalformedURLException e) {
Throwables.propagate(e);
}
return null;
}
}).toSet();
int numRetries = 0;
while (numRetries++ < maxTries && !setOfUrls.isEmpty()) {
setOfUrls = ImmutableSet.copyOf(Sets.filter(setOfUrls, new Predicate<URL>() {
@Override
public boolean apply(URL input) {
try {
String result = DruidStorageHandlerUtils.getURL(httpClient, input);
LOG.debug(String.format("Checking segment [%s] response is [%s]", input, result));
return Strings.isNullOrEmpty(result);
} catch (IOException e) {
LOG.error(String.format("Error while checking URL [%s]", input), e);
return true;
}
}
}));
try {
if (!setOfUrls.isEmpty()) {
Thread.sleep(passiveWaitTimeMs);
}
} catch (InterruptedException e) {
Thread.interrupted();
Throwables.propagate(e);
}
}
if (!setOfUrls.isEmpty()) {
// We are not Throwing an exception since it might be a transient issue that is blocking loading
console.printError(String.format("Wait time exhausted and we have [%s] out of [%s] segments not loaded yet", setOfUrls.size(), segmentList.size()));
}
} catch (IOException e) {
LOG.error("Exception while commit", e);
Throwables.propagate(e);
} finally {
cleanWorkingDir();
lifecycle.stop();
}
}
use of java.util.concurrent.Callable in project hive by apache.
the class TestLowLevelCacheImpl method testMTTWithCleanup.
@Test
public void testMTTWithCleanup() {
final LowLevelCacheImpl cache = new LowLevelCacheImpl(LlapDaemonCacheMetrics.create("test", "1"), new DummyCachePolicy(), new DummyAllocator(), true, 1);
final long fn1 = 1, fn2 = 2;
final int offsetsToUse = 8;
final CountDownLatch cdlIn = new CountDownLatch(4), cdlOut = new CountDownLatch(1);
final AtomicInteger rdmsDone = new AtomicInteger(0);
Callable<Long> rdmCall = new Callable<Long>() {
public Long call() {
int gets = 0, puts = 0;
try {
Random rdm = new Random(1234 + Thread.currentThread().getId());
syncThreadStart(cdlIn, cdlOut);
for (int i = 0; i < 20000; ++i) {
boolean isGet = rdm.nextBoolean(), isFn1 = rdm.nextBoolean();
long fileName = isFn1 ? fn1 : fn2;
int fileIndex = isFn1 ? 1 : 2;
int count = rdm.nextInt(offsetsToUse);
if (isGet) {
int[] offsets = new int[count];
count = generateOffsets(offsetsToUse, rdm, offsets);
CreateHelper list = new CreateHelper();
for (int j = 0; i < count; ++i) {
list.addOrMerge(offsets[j], offsets[j] + 1, true, false);
}
DiskRangeList iter = cache.getFileData(fileName, list.get(), 0, testFactory, null, null);
int j = -1;
while (iter != null) {
++j;
if (!(iter instanceof CacheChunk)) {
iter = iter.next;
continue;
}
++gets;
LlapDataBuffer result = (LlapDataBuffer) ((CacheChunk) iter).getBuffer();
assertEquals(makeFakeArenaIndex(fileIndex, offsets[j]), result.arenaIndex);
cache.decRefBuffer(result);
iter = iter.next;
}
} else {
DiskRange[] ranges = new DiskRange[count];
int[] offsets = new int[count];
for (int j = 0; j < count; ++j) {
int next = rdm.nextInt(offsetsToUse);
ranges[j] = dr(next, next + 1);
offsets[j] = next;
}
MemoryBuffer[] buffers = new MemoryBuffer[count];
for (int j = 0; j < offsets.length; ++j) {
LlapDataBuffer buf = LowLevelCacheImpl.allocateFake();
buf.arenaIndex = makeFakeArenaIndex(fileIndex, offsets[j]);
buffers[j] = buf;
}
long[] mask = cache.putFileData(fileName, ranges, buffers, 0, Priority.NORMAL, null);
puts += buffers.length;
long maskVal = 0;
if (mask != null) {
assertEquals(1, mask.length);
maskVal = mask[0];
}
for (int j = 0; j < offsets.length; ++j) {
LlapDataBuffer buf = (LlapDataBuffer) (buffers[j]);
if ((maskVal & 1) == 1) {
assertEquals(makeFakeArenaIndex(fileIndex, offsets[j]), buf.arenaIndex);
}
maskVal >>= 1;
cache.decRefBuffer(buf);
}
}
}
} finally {
rdmsDone.incrementAndGet();
}
return (((long) gets) << 32) | puts;
}
private int makeFakeArenaIndex(int fileIndex, long offset) {
return (int) ((fileIndex << 16) + offset);
}
};
FutureTask<Integer> evictionTask = new FutureTask<Integer>(new Callable<Integer>() {
public Integer call() {
boolean isFirstFile = false;
Random rdm = new Random(1234 + Thread.currentThread().getId());
int evictions = 0;
syncThreadStart(cdlIn, cdlOut);
while (rdmsDone.get() < 3) {
DiskRangeList head = new DiskRangeList(0, offsetsToUse + 1);
isFirstFile = !isFirstFile;
long fileId = isFirstFile ? fn1 : fn2;
head = cache.getFileData(fileId, head, 0, testFactory, null, null);
DiskRange[] results = head.listToArray();
int startIndex = rdm.nextInt(results.length), index = startIndex;
LlapDataBuffer victim = null;
do {
DiskRange r = results[index];
if (r instanceof CacheChunk) {
LlapDataBuffer result = (LlapDataBuffer) ((CacheChunk) r).getBuffer();
cache.decRefBuffer(result);
if (victim == null && result.invalidate()) {
++evictions;
victim = result;
}
}
++index;
if (index == results.length)
index = 0;
} while (index != startIndex);
if (victim == null)
continue;
cache.notifyEvicted(victim);
}
return evictions;
}
});
FutureTask<Long> rdmTask1 = new FutureTask<Long>(rdmCall), rdmTask2 = new FutureTask<Long>(rdmCall), rdmTask3 = new FutureTask<Long>(rdmCall);
Executor threadPool = Executors.newFixedThreadPool(4);
threadPool.execute(rdmTask1);
threadPool.execute(rdmTask2);
threadPool.execute(rdmTask3);
threadPool.execute(evictionTask);
try {
cdlIn.await();
cdlOut.countDown();
long result1 = rdmTask1.get(), result2 = rdmTask2.get(), result3 = rdmTask3.get();
int evictions = evictionTask.get();
LOG.info("MTT test: task 1: " + descRdmTask(result1) + ", task 2: " + descRdmTask(result2) + ", task 3: " + descRdmTask(result3) + "; " + evictions + " evictions");
} catch (Throwable t) {
throw new RuntimeException(t);
}
}
use of java.util.concurrent.Callable in project hive by apache.
the class TestHiveClientCache method testMultipleThreadAccess.
/**
* Check that a *new* client is created if asked from different threads even with
* the same hive configuration
* @throws ExecutionException
* @throws InterruptedException
*/
@Test
public void testMultipleThreadAccess() throws ExecutionException, InterruptedException {
final HiveClientCache cache = new HiveClientCache(1000);
class GetHiveClient implements Callable<IMetaStoreClient> {
@Override
public IMetaStoreClient call() throws IOException, MetaException, LoginException {
return cache.get(hiveConf);
}
}
ExecutorService executor = Executors.newFixedThreadPool(2);
Callable<IMetaStoreClient> worker1 = new GetHiveClient();
Callable<IMetaStoreClient> worker2 = new GetHiveClient();
Future<IMetaStoreClient> clientFuture1 = executor.submit(worker1);
Future<IMetaStoreClient> clientFuture2 = executor.submit(worker2);
IMetaStoreClient client1 = clientFuture1.get();
IMetaStoreClient client2 = clientFuture2.get();
assertNotNull(client1);
assertNotNull(client2);
assertNotSame(client1, client2);
}
Aggregations