Search in sources :

Example 1 with Partition

use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.

the class StatsUtils method getFileSizeForPartitions.

   * Find the bytes on disks occupied by list of partitions
   * @param conf
   *          - hive conf
   * @param parts
   *          - partition list
   * @return sizes of partitions
public static List<Long> getFileSizeForPartitions(final HiveConf conf, List<Partition> parts) {"Number of partitions : " + parts.size());
    ArrayList<Future<Long>> futures = new ArrayList<>();
    int threads = Math.max(1, conf.getIntVar(ConfVars.METASTORE_FS_HANDLER_THREADS_COUNT));
    final ExecutorService pool = Executors.newFixedThreadPool(threads, new ThreadFactoryBuilder().setDaemon(true).setNameFormat("Get-Partitions-Size-%d").build());
    final ArrayList<Long> sizes = new ArrayList<>(parts.size());
    for (final Partition part : parts) {
        final Path path = part.getDataLocation();
        futures.add(pool.submit(new Callable<Long>() {

            public Long call() throws Exception {
                try {
                    LOG.debug("Partition path : " + path);
                    FileSystem fs = path.getFileSystem(conf);
                    return fs.getContentSummary(path).getLength();
                } catch (IOException e) {
                    return 0L;
    try {
        for (int i = 0; i < futures.size(); i++) {
            sizes.add(i, futures.get(i).get());
    } catch (InterruptedException | ExecutionException e) {
        LOG.warn("Exception in processing files ", e);
    } finally {
    return sizes;
Also used : Path(org.apache.hadoop.fs.Path) Partition(org.apache.hadoop.hive.ql.metadata.Partition) ArrayList(java.util.ArrayList) IOException( Callable(java.util.concurrent.Callable) FileSystem(org.apache.hadoop.fs.FileSystem) ExecutorService(java.util.concurrent.ExecutorService) Future(java.util.concurrent.Future) ThreadFactoryBuilder( ExecutionException(java.util.concurrent.ExecutionException)

Example 2 with Partition

use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.

the class StatsUtils method getRangePartitionColumn.

private static Range getRangePartitionColumn(PartitionIterable partitions, String partColName, String colType, String defaultPartName) {
    Range range = null;
    String partVal;
    String colTypeLowerCase = colType.toLowerCase();
    if (colTypeLowerCase.equals(serdeConstants.TINYINT_TYPE_NAME) || colTypeLowerCase.equals(serdeConstants.SMALLINT_TYPE_NAME) || colTypeLowerCase.equals(serdeConstants.INT_TYPE_NAME) || colTypeLowerCase.equals(serdeConstants.BIGINT_TYPE_NAME)) {
        long min = Long.MAX_VALUE;
        long max = Long.MIN_VALUE;
        for (Partition partition : partitions) {
            partVal = partition.getSpec().get(partColName);
            if (partVal.equals(defaultPartName)) {
                // partition column value is null.
            } else {
                long value = Long.parseLong(partVal);
                min = Math.min(min, value);
                max = Math.max(max, value);
        range = new Range(min, max);
    } else if (colTypeLowerCase.equals(serdeConstants.FLOAT_TYPE_NAME) || colTypeLowerCase.equals(serdeConstants.DOUBLE_TYPE_NAME)) {
        double min = Double.MAX_VALUE;
        double max = Double.MIN_VALUE;
        for (Partition partition : partitions) {
            partVal = partition.getSpec().get(partColName);
            if (partVal.equals(defaultPartName)) {
                // partition column value is null.
            } else {
                double value = Double.parseDouble(partVal);
                min = Math.min(min, value);
                max = Math.max(max, value);
        range = new Range(min, max);
    } else if (colTypeLowerCase.startsWith(serdeConstants.DECIMAL_TYPE_NAME)) {
        double min = Double.MAX_VALUE;
        double max = Double.MIN_VALUE;
        for (Partition partition : partitions) {
            partVal = partition.getSpec().get(partColName);
            if (partVal.equals(defaultPartName)) {
                // partition column value is null.
            } else {
                double value = new BigDecimal(partVal).doubleValue();
                min = Math.min(min, value);
                max = Math.max(max, value);
        range = new Range(min, max);
    } else {
        // Columns statistics for complex datatypes are not supported yet
        return null;
    return range;
Also used : Partition(org.apache.hadoop.hive.ql.metadata.Partition) Range(org.apache.hadoop.hive.ql.plan.ColStatistics.Range) BigDecimal(java.math.BigDecimal)

Example 3 with Partition

use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.

the class DDLTask method alterTableAlterPart.

   * Alter partition column type in a table
   * @param db
   *          Database to rename the partition.
   * @param alterPartitionDesc
   *          change partition column type.
   * @return Returns 0 when execution succeeds and above 0 if it fails.
   * @throws HiveException
private int alterTableAlterPart(Hive db, AlterTableAlterPartDesc alterPartitionDesc) throws HiveException {
    Table tbl = db.getTable(alterPartitionDesc.getTableName(), true);
    String tabName = alterPartitionDesc.getTableName();
    // This is checked by DDLSemanticAnalyzer
    assert (tbl.isPartitioned());
    List<FieldSchema> newPartitionKeys = new ArrayList<FieldSchema>();
    // with a non null value before trying to alter the partition column type.
    try {
        Set<Partition> partitions = db.getAllPartitionsOf(tbl);
        int colIndex = -1;
        for (FieldSchema col : tbl.getTTable().getPartitionKeys()) {
            if (col.getName().compareTo(alterPartitionDesc.getPartKeySpec().getName()) == 0) {
        if (colIndex == -1 || colIndex == tbl.getTTable().getPartitionKeys().size()) {
            throw new HiveException("Cannot find partition column " + alterPartitionDesc.getPartKeySpec().getName());
        TypeInfo expectedType = TypeInfoUtils.getTypeInfoFromTypeString(alterPartitionDesc.getPartKeySpec().getType());
        ObjectInspector outputOI = TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(expectedType);
        Converter converter = ObjectInspectorConverters.getConverter(PrimitiveObjectInspectorFactory.javaStringObjectInspector, outputOI);
        // For all the existing partitions, check if the value can be type casted to a non-null object
        for (Partition part : partitions) {
            if (part.getName().equals(conf.getVar(HiveConf.ConfVars.DEFAULTPARTITIONNAME))) {
            try {
                String value = part.getValues().get(colIndex);
                Object convertedValue = converter.convert(value);
                if (convertedValue == null) {
                    throw new HiveException(" Converting from " + TypeInfoFactory.stringTypeInfo + " to " + expectedType + " for value : " + value + " resulted in NULL object");
            } catch (Exception e) {
                throw new HiveException("Exception while converting " + TypeInfoFactory.stringTypeInfo + " to " + expectedType + " for value : " + part.getValues().get(colIndex));
    } catch (Exception e) {
        throw new HiveException("Exception while checking type conversion of existing partition values to " + alterPartitionDesc.getPartKeySpec() + " : " + e.getMessage());
    for (FieldSchema col : tbl.getTTable().getPartitionKeys()) {
        if (col.getName().compareTo(alterPartitionDesc.getPartKeySpec().getName()) == 0) {
        } else {
    try {
        db.alterTable(tabName, tbl, null);
    } catch (InvalidOperationException e) {
        throw new HiveException(e, ErrorMsg.GENERIC_ERROR, "Unable to alter " + tabName);
    work.getInputs().add(new ReadEntity(tbl));
    // We've already locked the table as the input, don't relock it as the output.
    addIfAbsentByName(new WriteEntity(tbl, WriteEntity.WriteType.DDL_NO_LOCK));
    return 0;
Also used : Partition(org.apache.hadoop.hive.ql.metadata.Partition) AlterTableExchangePartition(org.apache.hadoop.hive.ql.plan.AlterTableExchangePartition) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) Table(org.apache.hadoop.hive.ql.metadata.Table) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) ArrayList(java.util.ArrayList) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) AlreadyExistsException(org.apache.hadoop.hive.metastore.api.AlreadyExistsException) InvalidOperationException(org.apache.hadoop.hive.metastore.api.InvalidOperationException) IOException( NoSuchObjectException(org.apache.hadoop.hive.metastore.api.NoSuchObjectException) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) URISyntaxException( SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) StringUtils.stringifyException(org.apache.hadoop.util.StringUtils.stringifyException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) SQLException(java.sql.SQLException) FileNotFoundException( HiveAuthzPluginException( InvalidTableException(org.apache.hadoop.hive.ql.metadata.InvalidTableException) ReadEntity(org.apache.hadoop.hive.ql.hooks.ReadEntity) InvalidOperationException(org.apache.hadoop.hive.metastore.api.InvalidOperationException) StatObjectConverter(org.apache.hadoop.hive.metastore.StatObjectConverter) Converter(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter) HivePrivilegeObject( HiveLockObject(org.apache.hadoop.hive.ql.lockmgr.HiveLockObject) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity)

Example 4 with Partition

use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.

the class DDLTask method archive.

private int archive(Hive db, AlterTableSimpleDesc simpleDesc, DriverContext driverContext) throws HiveException {
    Table tbl = db.getTable(simpleDesc.getTableName());
    if (tbl.getTableType() != TableType.MANAGED_TABLE) {
        throw new HiveException("ARCHIVE can only be performed on managed tables");
    Map<String, String> partSpec = simpleDesc.getPartSpec();
    PartSpecInfo partSpecInfo = PartSpecInfo.create(tbl, partSpec);
    List<Partition> partitions = db.getPartitions(tbl, partSpec);
    Path originalDir = null;
    // to keep backward compatibility
    if (partitions.isEmpty()) {
        throw new HiveException("No partition matches the specification");
    } else if (partSpecInfo.values.size() != tbl.getPartCols().size()) {
        // for partial specifications we need partitions to follow the scheme
        for (Partition p : partitions) {
            if (partitionInCustomLocation(tbl, p)) {
                String message = String.format("ARCHIVE cannot run for partition " + "groups with custom locations like %s", p.getLocation());
                throw new HiveException(message);
        originalDir = partSpecInfo.createPath(tbl);
    } else {
        Partition p = partitions.get(0);
        // partition can be archived if during recovery
        if (ArchiveUtils.isArchived(p)) {
            originalDir = new Path(getOriginalLocation(p));
        } else {
            originalDir = p.getDataLocation();
    Path intermediateArchivedDir = new Path(originalDir.getParent(), originalDir.getName() + INTERMEDIATE_ARCHIVED_DIR_SUFFIX);
    Path intermediateOriginalDir = new Path(originalDir.getParent(), originalDir.getName() + INTERMEDIATE_ORIGINAL_DIR_SUFFIX);
    console.printInfo("intermediate.archived is " + intermediateArchivedDir.toString());
    console.printInfo("intermediate.original is " + intermediateOriginalDir.toString());
    String archiveName = "data.har";
    FileSystem fs = null;
    try {
        fs = originalDir.getFileSystem(conf);
    } catch (IOException e) {
        throw new HiveException(e);
    URI archiveUri = (new Path(originalDir, archiveName)).toUri();
    URI originalUri = ArchiveUtils.addSlash(originalDir.toUri());
    ArchiveUtils.HarPathHelper harHelper = new ArchiveUtils.HarPathHelper(conf, archiveUri, originalUri);
    // if they are different, we throw an error
    for (Partition p : partitions) {
        if (ArchiveUtils.isArchived(p)) {
            if (ArchiveUtils.getArchivingLevel(p) != partSpecInfo.values.size()) {
                String name = ArchiveUtils.getPartialName(p, ArchiveUtils.getArchivingLevel(p));
                String m = String.format("Conflict with existing archive %s", name);
                throw new HiveException(m);
            } else {
                throw new HiveException("Partition(s) already archived");
    boolean recovery = false;
    if (pathExists(intermediateArchivedDir) || pathExists(intermediateOriginalDir)) {
        recovery = true;
        console.printInfo("Starting recovery after failed ARCHIVE");
    // to use as the move operation that created it is atomic.
    if (!pathExists(intermediateArchivedDir) && !pathExists(intermediateOriginalDir)) {
        // First create the archive in a tmp dir so that if the job fails, the
        // bad files don't pollute the filesystem
        Path tmpPath = new Path(driverContext.getCtx().getExternalTmpPath(originalDir), "partlevel");
        console.printInfo("Creating " + archiveName + " for " + originalDir.toString());
        console.printInfo("in " + tmpPath);
        console.printInfo("Please wait... (this may take a while)");
        // Create the Hadoop archive
        int ret = 0;
        try {
            int maxJobNameLen = conf.getIntVar(HiveConf.ConfVars.HIVEJOBNAMELENGTH);
            String jobname = String.format("Archiving %s@%s", tbl.getTableName(), partSpecInfo.getName());
            jobname = Utilities.abbreviate(jobname, maxJobNameLen - 6);
            conf.set(MRJobConfig.JOB_NAME, jobname);
            HadoopArchives har = new HadoopArchives(conf);
            List<String> args = new ArrayList<String>();
            ret =, args.toArray(new String[0]));
        } catch (Exception e) {
            throw new HiveException(e);
        if (ret != 0) {
            throw new HiveException("Error while creating HAR");
        // the partition directory. e.g. .../hr=12-intermediate-archived
        try {
            console.printInfo("Moving " + tmpPath + " to " + intermediateArchivedDir);
            if (pathExists(intermediateArchivedDir)) {
                throw new HiveException("The intermediate archive directory already exists.");
            fs.rename(tmpPath, intermediateArchivedDir);
        } catch (IOException e) {
            throw new HiveException("Error while moving tmp directory");
    } else {
        if (pathExists(intermediateArchivedDir)) {
            console.printInfo("Intermediate archive directory " + intermediateArchivedDir + " already exists. Assuming it contains an archived version of the partition");
    // if the move hasn't been made already
    if (!pathExists(intermediateOriginalDir)) {
        console.printInfo("Moving " + originalDir + " to " + intermediateOriginalDir);
        moveDir(fs, originalDir, intermediateOriginalDir);
    } else {
        console.printInfo(intermediateOriginalDir + " already exists. " + "Assuming it contains the original files in the partition");
    // Move the intermediate archived directory to the original parent directory
    if (!pathExists(originalDir)) {
        console.printInfo("Moving " + intermediateArchivedDir + " to " + originalDir);
        moveDir(fs, intermediateArchivedDir, originalDir);
    } else {
        console.printInfo(originalDir + " already exists. " + "Assuming it contains the archived version of the partition");
    // Record this change in the metastore
    try {
        for (Partition p : partitions) {
            URI originalPartitionUri = ArchiveUtils.addSlash(p.getDataLocation().toUri());
            URI harPartitionDir = harHelper.getHarUri(originalPartitionUri);
            StringBuilder authority = new StringBuilder();
            if (harPartitionDir.getUserInfo() != null) {
            if (harPartitionDir.getPort() != -1) {
            Path harPath = new Path(harPartitionDir.getScheme(), authority.toString(), // make in Path to ensure no slash at the end
            setArchived(p, harPath, partSpecInfo.values.size());
            db.alterPartition(simpleDesc.getTableName(), p, null);
    } catch (Exception e) {
        throw new HiveException("Unable to change the partition info for HAR", e);
    // will not be deleted. The user will run ARCHIVE again to clear this up
    if (pathExists(intermediateOriginalDir)) {
    if (recovery) {
        console.printInfo("Recovery after ARCHIVE succeeded");
    return 0;
Also used : Path(org.apache.hadoop.fs.Path) Partition(org.apache.hadoop.hive.ql.metadata.Partition) AlterTableExchangePartition(org.apache.hadoop.hive.ql.plan.AlterTableExchangePartition) Table(org.apache.hadoop.hive.ql.metadata.Table) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) ArrayList(java.util.ArrayList) IOException( URI( AlreadyExistsException(org.apache.hadoop.hive.metastore.api.AlreadyExistsException) InvalidOperationException(org.apache.hadoop.hive.metastore.api.InvalidOperationException) IOException( NoSuchObjectException(org.apache.hadoop.hive.metastore.api.NoSuchObjectException) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) URISyntaxException( SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) StringUtils.stringifyException(org.apache.hadoop.util.StringUtils.stringifyException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) SQLException(java.sql.SQLException) FileNotFoundException( HiveAuthzPluginException( InvalidTableException(org.apache.hadoop.hive.ql.metadata.InvalidTableException) PartSpecInfo(org.apache.hadoop.hive.ql.exec.ArchiveUtils.PartSpecInfo) FileSystem(org.apache.hadoop.fs.FileSystem) HadoopArchives(

Example 5 with Partition

use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.

the class DDLTask method dropTable.

private void dropTable(Hive db, Table tbl, DropTableDesc dropTbl) throws HiveException {
    // This is a true DROP TABLE
    if (tbl != null) {
        if (tbl.isView()) {
            if (!dropTbl.getExpectView()) {
                if (dropTbl.getIfExists()) {
                if (dropTbl.getExpectMaterializedView()) {
                    throw new HiveException("Cannot drop a view with DROP MATERIALIZED VIEW");
                } else {
                    throw new HiveException("Cannot drop a view with DROP TABLE");
        } else if (tbl.isMaterializedView()) {
            if (!dropTbl.getExpectMaterializedView()) {
                if (dropTbl.getIfExists()) {
                if (dropTbl.getExpectView()) {
                    throw new HiveException("Cannot drop a materialized view with DROP VIEW");
                } else {
                    throw new HiveException("Cannot drop a materialized view with DROP TABLE");
        } else {
            if (dropTbl.getExpectView()) {
                if (dropTbl.getIfExists()) {
                throw new HiveException("Cannot drop a base table with DROP VIEW");
            } else if (dropTbl.getExpectMaterializedView()) {
                if (dropTbl.getIfExists()) {
                throw new HiveException("Cannot drop a base table with DROP MATERIALIZED VIEW");
    ReplicationSpec replicationSpec = dropTbl.getReplicationSpec();
    if ((tbl != null) && replicationSpec.isInReplicationScope()) {
       * DROP TABLE FOR REPLICATION behaves differently from DROP TABLE IF EXISTS - it more closely
       * matches a DROP TABLE IF OLDER THAN(x) semantic.
       * Ideally, commands executed under the scope of replication need to be idempotent and resilient
       * to repeats. What can happen, sometimes, is that a drone processing a replication task can
       * have been abandoned for not returning in time, but still execute its task after a while,
       * which should not result in it mucking up data that has been impressed later on. So, for eg.,
       * if we create partition P1, followed by droppping it, followed by creating it yet again,
       * the replication of that drop should not drop the newer partition if it runs after the destination
       * object is already in the newer state.
       * Thus, we check the replicationSpec.allowEventReplacementInto to determine whether or not we can
       * drop the object in question(will return false if object is newer than the event, true if not)
       * In addition, since DROP TABLE FOR REPLICATION can result in a table not being dropped, while DROP
       * TABLE will always drop the table, and the included partitions, DROP TABLE FOR REPLICATION must
       * do one more thing - if it does not drop the table because the table is in a newer state, it must
       * drop the partitions inside it that are older than this event. To wit, DROP TABLE FOR REPL
       * acts like a recursive DROP TABLE IF OLDER.
        if (!replicationSpec.allowEventReplacementInto(tbl)) {
            // any partitions inside that are older.
            if (tbl.isPartitioned()) {
                PartitionIterable partitions = new PartitionIterable(db, tbl, null, conf.getIntVar(HiveConf.ConfVars.METASTORE_BATCH_RETRIEVE_MAX));
                for (Partition p : Iterables.filter(partitions, replicationSpec.allowEventReplacementInto())) {
                    db.dropPartition(tbl.getDbName(), tbl.getTableName(), p.getValues(), true);
            // table is newer, leave it be.
    // drop the table
    db.dropTable(dropTbl.getTableName(), dropTbl.getIfPurge());
    if (tbl != null) {
        // Remove from cache if it is a materialized view
        if (tbl.isMaterializedView()) {
        // We have already locked the table in DDLSemanticAnalyzer, don't do it again here
        addIfAbsentByName(new WriteEntity(tbl, WriteEntity.WriteType.DDL_NO_LOCK));
Also used : Partition(org.apache.hadoop.hive.ql.metadata.Partition) AlterTableExchangePartition(org.apache.hadoop.hive.ql.plan.AlterTableExchangePartition) ReplicationSpec(org.apache.hadoop.hive.ql.parse.ReplicationSpec) PartitionIterable(org.apache.hadoop.hive.ql.metadata.PartitionIterable) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity)


Partition (org.apache.hadoop.hive.ql.metadata.Partition)83 Table (org.apache.hadoop.hive.ql.metadata.Table)48 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)42 ArrayList (java.util.ArrayList)35 AlterTableExchangePartition (org.apache.hadoop.hive.ql.plan.AlterTableExchangePartition)23 Path (org.apache.hadoop.fs.Path)21 WriteEntity (org.apache.hadoop.hive.ql.hooks.WriteEntity)21 HashMap (java.util.HashMap)17 LinkedHashMap (java.util.LinkedHashMap)17 PrunedPartitionList (org.apache.hadoop.hive.ql.parse.PrunedPartitionList)16 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)16 ReadEntity (org.apache.hadoop.hive.ql.hooks.ReadEntity)15 IOException ( FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)13 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)12 FileNotFoundException ( FileSystem (org.apache.hadoop.fs.FileSystem)10 InvalidOperationException (org.apache.hadoop.hive.metastore.api.InvalidOperationException)10 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)10 InvalidTableException (org.apache.hadoop.hive.ql.metadata.InvalidTableException)10