Search in sources :

Example 46 with HiveException

use of org.apache.hadoop.hive.ql.metadata.HiveException in project hive by apache.

the class StatsTask method getContext.

private StatsCollectionContext getContext() throws HiveException {
    StatsCollectionContext scc = new StatsCollectionContext(conf);
    Task sourceTask = getWork().getSourceTask();
    if (sourceTask == null) {
        throw new HiveException(ErrorMsg.STATSAGGREGATOR_SOURCETASK_NULL.getErrorCodedMsg());
    }
    scc.setTask(sourceTask);
    scc.setStatsTmpDir(this.getWork().getStatsTmpDir());
    return scc;
}
Also used : StatsCollectionContext(org.apache.hadoop.hive.ql.stats.StatsCollectionContext) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException)

Example 47 with HiveException

use of org.apache.hadoop.hive.ql.metadata.HiveException in project hive by apache.

the class StatsTask method aggregateStats.

private int aggregateStats(Hive db) {
    StatsAggregator statsAggregator = null;
    int ret = 0;
    StatsCollectionContext scc = null;
    EnvironmentContext environmentContext = null;
    try {
        // Stats setup:
        final Warehouse wh = new Warehouse(conf);
        if (!getWork().getNoStatsAggregator() && !getWork().isNoScanAnalyzeCommand()) {
            try {
                scc = getContext();
                statsAggregator = createStatsAggregator(scc, conf);
            } catch (HiveException e) {
                if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_STATS_RELIABLE)) {
                    throw e;
                }
                console.printError(ErrorMsg.STATS_SKIPPING_BY_ERROR.getErrorCodedMsg(e.toString()));
            }
        }
        List<Partition> partitions = getPartitionsList(db);
        boolean atomic = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_STATS_ATOMIC);
        String tableFullName = table.getDbName() + "." + table.getTableName();
        if (partitions == null) {
            org.apache.hadoop.hive.metastore.api.Table tTable = table.getTTable();
            Map<String, String> parameters = tTable.getParameters();
            // acidTable will not have accurate stats unless it is set through analyze command.
            if (work.getTableSpecs() == null && AcidUtils.isAcidTable(table)) {
                StatsSetupConst.setBasicStatsState(parameters, StatsSetupConst.FALSE);
            } else if (work.getTableSpecs() != null || (work.getLoadTableDesc() != null && work.getLoadTableDesc().getReplace()) || (work.getLoadFileDesc() != null && !work.getLoadFileDesc().getDestinationCreateTable().isEmpty())) {
                StatsSetupConst.setBasicStatsState(parameters, StatsSetupConst.TRUE);
            }
            // non-partitioned tables:
            if (!existStats(parameters) && atomic) {
                return 0;
            }
            // For eg. if a file is being loaded, the old number of rows are not valid
            if (work.isClearAggregatorStats()) {
                // we choose to keep the invalid stats and only change the setting.
                StatsSetupConst.setBasicStatsState(parameters, StatsSetupConst.FALSE);
            }
            updateQuickStats(wh, parameters, tTable.getSd());
            if (StatsSetupConst.areBasicStatsUptoDate(parameters)) {
                if (statsAggregator != null) {
                    String prefix = getAggregationPrefix(table, null);
                    updateStats(statsAggregator, parameters, prefix, atomic);
                }
                // write table stats to metastore
                if (!getWork().getNoStatsAggregator()) {
                    environmentContext = new EnvironmentContext();
                    environmentContext.putToProperties(StatsSetupConst.STATS_GENERATED, StatsSetupConst.TASK);
                }
            }
            getHive().alterTable(tableFullName, new Table(tTable), environmentContext);
            if (conf.getBoolVar(ConfVars.TEZ_EXEC_SUMMARY)) {
                console.printInfo("Table " + tableFullName + " stats: [" + toString(parameters) + ']');
            }
            LOG.info("Table " + tableFullName + " stats: [" + toString(parameters) + ']');
        } else {
            // Partitioned table:
            // Need to get the old stats of the partition
            // and update the table stats based on the old and new stats.
            List<Partition> updates = new ArrayList<Partition>();
            //Get the file status up-front for all partitions. Beneficial in cases of blob storage systems
            final Map<String, FileStatus[]> fileStatusMap = new ConcurrentHashMap<String, FileStatus[]>();
            int poolSize = conf.getInt(ConfVars.HIVE_MOVE_FILES_THREAD_COUNT.varname, 1);
            // In case thread count is set to 0, use single thread.
            poolSize = Math.max(poolSize, 1);
            final ExecutorService pool = Executors.newFixedThreadPool(poolSize, new ThreadFactoryBuilder().setDaemon(true).setNameFormat("stats-updater-thread-%d").build());
            final List<Future<Void>> futures = Lists.newLinkedList();
            LOG.debug("Getting file stats of all partitions. threadpool size:" + poolSize);
            try {
                for (final Partition partn : partitions) {
                    final String partitionName = partn.getName();
                    final org.apache.hadoop.hive.metastore.api.Partition tPart = partn.getTPartition();
                    Map<String, String> parameters = tPart.getParameters();
                    if (!existStats(parameters) && atomic) {
                        continue;
                    }
                    futures.add(pool.submit(new Callable<Void>() {

                        @Override
                        public Void call() throws Exception {
                            FileStatus[] partfileStatus = wh.getFileStatusesForSD(tPart.getSd());
                            fileStatusMap.put(partitionName, partfileStatus);
                            return null;
                        }
                    }));
                }
                pool.shutdown();
                for (Future<Void> future : futures) {
                    future.get();
                }
            } catch (InterruptedException e) {
                LOG.debug("Cancelling " + futures.size() + " file stats lookup tasks");
                //cancel other futures
                for (Future future : futures) {
                    future.cancel(true);
                }
                // Fail the query if the stats are supposed to be reliable
                if (work.isStatsReliable()) {
                    ret = 1;
                }
            } finally {
                if (pool != null) {
                    pool.shutdownNow();
                }
                LOG.debug("Finished getting file stats of all partitions");
            }
            for (Partition partn : partitions) {
                //
                // get the old partition stats
                //
                org.apache.hadoop.hive.metastore.api.Partition tPart = partn.getTPartition();
                Map<String, String> parameters = tPart.getParameters();
                if (work.getTableSpecs() == null && AcidUtils.isAcidTable(table)) {
                    StatsSetupConst.setBasicStatsState(parameters, StatsSetupConst.FALSE);
                } else if (work.getTableSpecs() != null || (work.getLoadTableDesc() != null && work.getLoadTableDesc().getReplace()) || (work.getLoadFileDesc() != null && !work.getLoadFileDesc().getDestinationCreateTable().isEmpty())) {
                    StatsSetupConst.setBasicStatsState(parameters, StatsSetupConst.TRUE);
                }
                //only when the stats exist, it is added to fileStatusMap
                if (!fileStatusMap.containsKey(partn.getName())) {
                    continue;
                }
                // For eg. if a file is being loaded, the old number of rows are not valid
                if (work.isClearAggregatorStats()) {
                    // we choose to keep the invalid stats and only change the setting.
                    StatsSetupConst.setBasicStatsState(parameters, StatsSetupConst.FALSE);
                }
                updateQuickStats(parameters, fileStatusMap.get(partn.getName()));
                if (StatsSetupConst.areBasicStatsUptoDate(parameters)) {
                    if (statsAggregator != null) {
                        String prefix = getAggregationPrefix(table, partn);
                        updateStats(statsAggregator, parameters, prefix, atomic);
                    }
                    if (!getWork().getNoStatsAggregator()) {
                        environmentContext = new EnvironmentContext();
                        environmentContext.putToProperties(StatsSetupConst.STATS_GENERATED, StatsSetupConst.TASK);
                    }
                }
                updates.add(new Partition(table, tPart));
                if (conf.getBoolVar(ConfVars.TEZ_EXEC_SUMMARY)) {
                    console.printInfo("Partition " + tableFullName + partn.getSpec() + " stats: [" + toString(parameters) + ']');
                }
                LOG.info("Partition " + tableFullName + partn.getSpec() + " stats: [" + toString(parameters) + ']');
            }
            if (!updates.isEmpty()) {
                db.alterPartitions(tableFullName, updates, environmentContext);
            }
        }
    } catch (Exception e) {
        console.printInfo("[Warning] could not update stats.", "Failed with exception " + e.getMessage() + "\n" + StringUtils.stringifyException(e));
        // Fail the query if the stats are supposed to be reliable
        if (work.isStatsReliable()) {
            ret = 1;
        }
    } finally {
        if (statsAggregator != null) {
            statsAggregator.closeConnection(scc);
        }
    }
    // anything else indicates failure
    return ret;
}
Also used : Warehouse(org.apache.hadoop.hive.metastore.Warehouse) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) FileStatus(org.apache.hadoop.fs.FileStatus) ArrayList(java.util.ArrayList) Callable(java.util.concurrent.Callable) EnvironmentContext(org.apache.hadoop.hive.metastore.api.EnvironmentContext) ThreadFactoryBuilder(com.google.common.util.concurrent.ThreadFactoryBuilder) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) StatsCollectionContext(org.apache.hadoop.hive.ql.stats.StatsCollectionContext) Partition(org.apache.hadoop.hive.ql.metadata.Partition) Table(org.apache.hadoop.hive.ql.metadata.Table) StatsAggregator(org.apache.hadoop.hive.ql.stats.StatsAggregator) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) ExecutorService(java.util.concurrent.ExecutorService) Future(java.util.concurrent.Future)

Example 48 with HiveException

use of org.apache.hadoop.hive.ql.metadata.HiveException in project hive by apache.

the class MoveTask method moveFileInDfs.

private void moveFileInDfs(Path sourcePath, Path targetPath, FileSystem fs) throws HiveException, IOException {
    // if source exists, rename. Otherwise, create a empty directory
    if (fs.exists(sourcePath)) {
        Path deletePath = null;
        // create the targetpath.getParent() if it not exist
        if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_INSERT_INTO_MULTILEVEL_DIRS)) {
            deletePath = createTargetPath(targetPath, fs);
        }
        Hive.clearDestForSubDirSrc(conf, targetPath, sourcePath, false);
        if (!Hive.moveFile(conf, sourcePath, targetPath, true, false)) {
            try {
                if (deletePath != null) {
                    fs.delete(deletePath, true);
                }
            } catch (IOException e) {
                LOG.info("Unable to delete the path created for facilitating rename" + deletePath);
            }
            throw new HiveException("Unable to rename: " + sourcePath + " to: " + targetPath);
        }
    } else if (!fs.mkdirs(targetPath)) {
        throw new HiveException("Unable to make directory: " + targetPath);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) IOException(java.io.IOException)

Example 49 with HiveException

use of org.apache.hadoop.hive.ql.metadata.HiveException in project hive by apache.

the class OperatorFactory method getVectorOperator.

public static <T extends OperatorDesc> Operator<T> getVectorOperator(Class<? extends Operator<?>> opClass, CompilationOpContext cContext, T conf, VectorizationContext vContext) throws HiveException {
    try {
        VectorDesc vectorDesc = ((AbstractOperatorDesc) conf).getVectorDesc();
        vectorDesc.setVectorOp(opClass);
        Operator<T> op = (Operator<T>) opClass.getDeclaredConstructor(CompilationOpContext.class, VectorizationContext.class, OperatorDesc.class).newInstance(cContext, vContext, conf);
        return op;
    } catch (Exception e) {
        e.printStackTrace();
        throw new HiveException(e);
    }
}
Also used : SparkPartitionPruningSinkOperator(org.apache.hadoop.hive.ql.parse.spark.SparkPartitionPruningSinkOperator) VectorSparkPartitionPruningSinkOperator(org.apache.hadoop.hive.ql.exec.vector.VectorSparkPartitionPruningSinkOperator) VectorFilterOperator(org.apache.hadoop.hive.ql.exec.vector.VectorFilterOperator) VectorReduceSinkOperator(org.apache.hadoop.hive.ql.exec.vector.VectorReduceSinkOperator) VectorGroupByOperator(org.apache.hadoop.hive.ql.exec.vector.VectorGroupByOperator) VectorMapJoinOperator(org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator) VectorAppMasterEventOperator(org.apache.hadoop.hive.ql.exec.vector.VectorAppMasterEventOperator) VectorSelectOperator(org.apache.hadoop.hive.ql.exec.vector.VectorSelectOperator) VectorSMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.vector.VectorSMBMapJoinOperator) VectorFileSinkOperator(org.apache.hadoop.hive.ql.exec.vector.VectorFileSinkOperator) VectorSparkHashTableSinkOperator(org.apache.hadoop.hive.ql.exec.vector.VectorSparkHashTableSinkOperator) VectorLimitOperator(org.apache.hadoop.hive.ql.exec.vector.VectorLimitOperator) AbstractOperatorDesc(org.apache.hadoop.hive.ql.plan.AbstractOperatorDesc) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) AbstractVectorDesc(org.apache.hadoop.hive.ql.plan.AbstractVectorDesc) VectorDesc(org.apache.hadoop.hive.ql.plan.VectorDesc) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException)

Example 50 with HiveException

use of org.apache.hadoop.hive.ql.metadata.HiveException in project hive by apache.

the class Utilities method renameOrMoveFiles.

/**
   * Rename src to dst, or in the case dst already exists, move files in src to dst. If there is an
   * existing file with the same name, the new file's name will be appended with "_1", "_2", etc.
   *
   * @param fs
   *          the FileSystem where src and dst are on.
   * @param src
   *          the src directory
   * @param dst
   *          the target directory
   * @throws IOException
   */
public static void renameOrMoveFiles(FileSystem fs, Path src, Path dst) throws IOException, HiveException {
    if (!fs.exists(dst)) {
        if (!fs.rename(src, dst)) {
            throw new HiveException("Unable to move: " + src + " to: " + dst);
        }
    } else {
        // move file by file
        FileStatus[] files = fs.listStatus(src);
        for (FileStatus file : files) {
            Path srcFilePath = file.getPath();
            String fileName = srcFilePath.getName();
            Path dstFilePath = new Path(dst, fileName);
            if (file.isDir()) {
                renameOrMoveFiles(fs, srcFilePath, dstFilePath);
            } else {
                if (fs.exists(dstFilePath)) {
                    int suffix = 0;
                    do {
                        suffix++;
                        dstFilePath = new Path(dst, fileName + "_" + suffix);
                    } while (fs.exists(dstFilePath));
                }
                if (!fs.rename(srcFilePath, dstFilePath)) {
                    throw new HiveException("Unable to move: " + src + " to: " + dst);
                }
            }
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) FileStatus(org.apache.hadoop.fs.FileStatus)

Aggregations

HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)451 IOException (java.io.IOException)172 ArrayList (java.util.ArrayList)81 Path (org.apache.hadoop.fs.Path)68 Table (org.apache.hadoop.hive.ql.metadata.Table)65 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)46 SerDeException (org.apache.hadoop.hive.serde2.SerDeException)45 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)45 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)42 Partition (org.apache.hadoop.hive.ql.metadata.Partition)39 FileSystem (org.apache.hadoop.fs.FileSystem)31 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)29 LinkedHashMap (java.util.LinkedHashMap)28 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)28 InvalidTableException (org.apache.hadoop.hive.ql.metadata.InvalidTableException)28 FileNotFoundException (java.io.FileNotFoundException)27 URISyntaxException (java.net.URISyntaxException)27 HashMap (java.util.HashMap)26 InvalidOperationException (org.apache.hadoop.hive.metastore.api.InvalidOperationException)23 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)23