use of org.apache.hadoop.hive.ql.metadata.HiveException in project hive by apache.
the class StatsTask method getContext.
private StatsCollectionContext getContext() throws HiveException {
StatsCollectionContext scc = new StatsCollectionContext(conf);
Task sourceTask = getWork().getSourceTask();
if (sourceTask == null) {
throw new HiveException(ErrorMsg.STATSAGGREGATOR_SOURCETASK_NULL.getErrorCodedMsg());
}
scc.setTask(sourceTask);
scc.setStatsTmpDir(this.getWork().getStatsTmpDir());
return scc;
}
use of org.apache.hadoop.hive.ql.metadata.HiveException in project hive by apache.
the class StatsTask method aggregateStats.
private int aggregateStats(Hive db) {
StatsAggregator statsAggregator = null;
int ret = 0;
StatsCollectionContext scc = null;
EnvironmentContext environmentContext = null;
try {
// Stats setup:
final Warehouse wh = new Warehouse(conf);
if (!getWork().getNoStatsAggregator() && !getWork().isNoScanAnalyzeCommand()) {
try {
scc = getContext();
statsAggregator = createStatsAggregator(scc, conf);
} catch (HiveException e) {
if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_STATS_RELIABLE)) {
throw e;
}
console.printError(ErrorMsg.STATS_SKIPPING_BY_ERROR.getErrorCodedMsg(e.toString()));
}
}
List<Partition> partitions = getPartitionsList(db);
boolean atomic = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_STATS_ATOMIC);
String tableFullName = table.getDbName() + "." + table.getTableName();
if (partitions == null) {
org.apache.hadoop.hive.metastore.api.Table tTable = table.getTTable();
Map<String, String> parameters = tTable.getParameters();
// acidTable will not have accurate stats unless it is set through analyze command.
if (work.getTableSpecs() == null && AcidUtils.isAcidTable(table)) {
StatsSetupConst.setBasicStatsState(parameters, StatsSetupConst.FALSE);
} else if (work.getTableSpecs() != null || (work.getLoadTableDesc() != null && work.getLoadTableDesc().getReplace()) || (work.getLoadFileDesc() != null && !work.getLoadFileDesc().getDestinationCreateTable().isEmpty())) {
StatsSetupConst.setBasicStatsState(parameters, StatsSetupConst.TRUE);
}
// non-partitioned tables:
if (!existStats(parameters) && atomic) {
return 0;
}
// For eg. if a file is being loaded, the old number of rows are not valid
if (work.isClearAggregatorStats()) {
// we choose to keep the invalid stats and only change the setting.
StatsSetupConst.setBasicStatsState(parameters, StatsSetupConst.FALSE);
}
updateQuickStats(wh, parameters, tTable.getSd());
if (StatsSetupConst.areBasicStatsUptoDate(parameters)) {
if (statsAggregator != null) {
String prefix = getAggregationPrefix(table, null);
updateStats(statsAggregator, parameters, prefix, atomic);
}
// write table stats to metastore
if (!getWork().getNoStatsAggregator()) {
environmentContext = new EnvironmentContext();
environmentContext.putToProperties(StatsSetupConst.STATS_GENERATED, StatsSetupConst.TASK);
}
}
getHive().alterTable(tableFullName, new Table(tTable), environmentContext);
if (conf.getBoolVar(ConfVars.TEZ_EXEC_SUMMARY)) {
console.printInfo("Table " + tableFullName + " stats: [" + toString(parameters) + ']');
}
LOG.info("Table " + tableFullName + " stats: [" + toString(parameters) + ']');
} else {
// Partitioned table:
// Need to get the old stats of the partition
// and update the table stats based on the old and new stats.
List<Partition> updates = new ArrayList<Partition>();
//Get the file status up-front for all partitions. Beneficial in cases of blob storage systems
final Map<String, FileStatus[]> fileStatusMap = new ConcurrentHashMap<String, FileStatus[]>();
int poolSize = conf.getInt(ConfVars.HIVE_MOVE_FILES_THREAD_COUNT.varname, 1);
// In case thread count is set to 0, use single thread.
poolSize = Math.max(poolSize, 1);
final ExecutorService pool = Executors.newFixedThreadPool(poolSize, new ThreadFactoryBuilder().setDaemon(true).setNameFormat("stats-updater-thread-%d").build());
final List<Future<Void>> futures = Lists.newLinkedList();
LOG.debug("Getting file stats of all partitions. threadpool size:" + poolSize);
try {
for (final Partition partn : partitions) {
final String partitionName = partn.getName();
final org.apache.hadoop.hive.metastore.api.Partition tPart = partn.getTPartition();
Map<String, String> parameters = tPart.getParameters();
if (!existStats(parameters) && atomic) {
continue;
}
futures.add(pool.submit(new Callable<Void>() {
@Override
public Void call() throws Exception {
FileStatus[] partfileStatus = wh.getFileStatusesForSD(tPart.getSd());
fileStatusMap.put(partitionName, partfileStatus);
return null;
}
}));
}
pool.shutdown();
for (Future<Void> future : futures) {
future.get();
}
} catch (InterruptedException e) {
LOG.debug("Cancelling " + futures.size() + " file stats lookup tasks");
//cancel other futures
for (Future future : futures) {
future.cancel(true);
}
// Fail the query if the stats are supposed to be reliable
if (work.isStatsReliable()) {
ret = 1;
}
} finally {
if (pool != null) {
pool.shutdownNow();
}
LOG.debug("Finished getting file stats of all partitions");
}
for (Partition partn : partitions) {
//
// get the old partition stats
//
org.apache.hadoop.hive.metastore.api.Partition tPart = partn.getTPartition();
Map<String, String> parameters = tPart.getParameters();
if (work.getTableSpecs() == null && AcidUtils.isAcidTable(table)) {
StatsSetupConst.setBasicStatsState(parameters, StatsSetupConst.FALSE);
} else if (work.getTableSpecs() != null || (work.getLoadTableDesc() != null && work.getLoadTableDesc().getReplace()) || (work.getLoadFileDesc() != null && !work.getLoadFileDesc().getDestinationCreateTable().isEmpty())) {
StatsSetupConst.setBasicStatsState(parameters, StatsSetupConst.TRUE);
}
//only when the stats exist, it is added to fileStatusMap
if (!fileStatusMap.containsKey(partn.getName())) {
continue;
}
// For eg. if a file is being loaded, the old number of rows are not valid
if (work.isClearAggregatorStats()) {
// we choose to keep the invalid stats and only change the setting.
StatsSetupConst.setBasicStatsState(parameters, StatsSetupConst.FALSE);
}
updateQuickStats(parameters, fileStatusMap.get(partn.getName()));
if (StatsSetupConst.areBasicStatsUptoDate(parameters)) {
if (statsAggregator != null) {
String prefix = getAggregationPrefix(table, partn);
updateStats(statsAggregator, parameters, prefix, atomic);
}
if (!getWork().getNoStatsAggregator()) {
environmentContext = new EnvironmentContext();
environmentContext.putToProperties(StatsSetupConst.STATS_GENERATED, StatsSetupConst.TASK);
}
}
updates.add(new Partition(table, tPart));
if (conf.getBoolVar(ConfVars.TEZ_EXEC_SUMMARY)) {
console.printInfo("Partition " + tableFullName + partn.getSpec() + " stats: [" + toString(parameters) + ']');
}
LOG.info("Partition " + tableFullName + partn.getSpec() + " stats: [" + toString(parameters) + ']');
}
if (!updates.isEmpty()) {
db.alterPartitions(tableFullName, updates, environmentContext);
}
}
} catch (Exception e) {
console.printInfo("[Warning] could not update stats.", "Failed with exception " + e.getMessage() + "\n" + StringUtils.stringifyException(e));
// Fail the query if the stats are supposed to be reliable
if (work.isStatsReliable()) {
ret = 1;
}
} finally {
if (statsAggregator != null) {
statsAggregator.closeConnection(scc);
}
}
// anything else indicates failure
return ret;
}
use of org.apache.hadoop.hive.ql.metadata.HiveException in project hive by apache.
the class MoveTask method moveFileInDfs.
private void moveFileInDfs(Path sourcePath, Path targetPath, FileSystem fs) throws HiveException, IOException {
// if source exists, rename. Otherwise, create a empty directory
if (fs.exists(sourcePath)) {
Path deletePath = null;
// create the targetpath.getParent() if it not exist
if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_INSERT_INTO_MULTILEVEL_DIRS)) {
deletePath = createTargetPath(targetPath, fs);
}
Hive.clearDestForSubDirSrc(conf, targetPath, sourcePath, false);
if (!Hive.moveFile(conf, sourcePath, targetPath, true, false)) {
try {
if (deletePath != null) {
fs.delete(deletePath, true);
}
} catch (IOException e) {
LOG.info("Unable to delete the path created for facilitating rename" + deletePath);
}
throw new HiveException("Unable to rename: " + sourcePath + " to: " + targetPath);
}
} else if (!fs.mkdirs(targetPath)) {
throw new HiveException("Unable to make directory: " + targetPath);
}
}
use of org.apache.hadoop.hive.ql.metadata.HiveException in project hive by apache.
the class OperatorFactory method getVectorOperator.
public static <T extends OperatorDesc> Operator<T> getVectorOperator(Class<? extends Operator<?>> opClass, CompilationOpContext cContext, T conf, VectorizationContext vContext) throws HiveException {
try {
VectorDesc vectorDesc = ((AbstractOperatorDesc) conf).getVectorDesc();
vectorDesc.setVectorOp(opClass);
Operator<T> op = (Operator<T>) opClass.getDeclaredConstructor(CompilationOpContext.class, VectorizationContext.class, OperatorDesc.class).newInstance(cContext, vContext, conf);
return op;
} catch (Exception e) {
e.printStackTrace();
throw new HiveException(e);
}
}
use of org.apache.hadoop.hive.ql.metadata.HiveException in project hive by apache.
the class Utilities method renameOrMoveFiles.
/**
* Rename src to dst, or in the case dst already exists, move files in src to dst. If there is an
* existing file with the same name, the new file's name will be appended with "_1", "_2", etc.
*
* @param fs
* the FileSystem where src and dst are on.
* @param src
* the src directory
* @param dst
* the target directory
* @throws IOException
*/
public static void renameOrMoveFiles(FileSystem fs, Path src, Path dst) throws IOException, HiveException {
if (!fs.exists(dst)) {
if (!fs.rename(src, dst)) {
throw new HiveException("Unable to move: " + src + " to: " + dst);
}
} else {
// move file by file
FileStatus[] files = fs.listStatus(src);
for (FileStatus file : files) {
Path srcFilePath = file.getPath();
String fileName = srcFilePath.getName();
Path dstFilePath = new Path(dst, fileName);
if (file.isDir()) {
renameOrMoveFiles(fs, srcFilePath, dstFilePath);
} else {
if (fs.exists(dstFilePath)) {
int suffix = 0;
do {
suffix++;
dstFilePath = new Path(dst, fileName + "_" + suffix);
} while (fs.exists(dstFilePath));
}
if (!fs.rename(srcFilePath, dstFilePath)) {
throw new HiveException("Unable to move: " + src + " to: " + dst);
}
}
}
}
}
Aggregations