Search in sources :

Example 41 with Partition

use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.

the class DDLTask method archive.

private int archive(Hive db, AlterTableSimpleDesc simpleDesc, DriverContext driverContext) throws HiveException {
    Table tbl = db.getTable(simpleDesc.getTableName());
    if (tbl.getTableType() != TableType.MANAGED_TABLE) {
        throw new HiveException("ARCHIVE can only be performed on managed tables");
    }
    Map<String, String> partSpec = simpleDesc.getPartSpec();
    PartSpecInfo partSpecInfo = PartSpecInfo.create(tbl, partSpec);
    List<Partition> partitions = db.getPartitions(tbl, partSpec);
    Path originalDir = null;
    // to keep backward compatibility
    if (partitions.isEmpty()) {
        throw new HiveException("No partition matches the specification");
    } else if (partSpecInfo.values.size() != tbl.getPartCols().size()) {
        // for partial specifications we need partitions to follow the scheme
        for (Partition p : partitions) {
            if (partitionInCustomLocation(tbl, p)) {
                String message = String.format("ARCHIVE cannot run for partition " + "groups with custom locations like %s", p.getLocation());
                throw new HiveException(message);
            }
        }
        originalDir = partSpecInfo.createPath(tbl);
    } else {
        Partition p = partitions.get(0);
        // partition can be archived if during recovery
        if (ArchiveUtils.isArchived(p)) {
            originalDir = new Path(getOriginalLocation(p));
        } else {
            originalDir = p.getDataLocation();
        }
    }
    Path intermediateArchivedDir = new Path(originalDir.getParent(), originalDir.getName() + INTERMEDIATE_ARCHIVED_DIR_SUFFIX);
    Path intermediateOriginalDir = new Path(originalDir.getParent(), originalDir.getName() + INTERMEDIATE_ORIGINAL_DIR_SUFFIX);
    console.printInfo("intermediate.archived is " + intermediateArchivedDir.toString());
    console.printInfo("intermediate.original is " + intermediateOriginalDir.toString());
    String archiveName = "data.har";
    FileSystem fs = null;
    try {
        fs = originalDir.getFileSystem(conf);
    } catch (IOException e) {
        throw new HiveException(e);
    }
    URI archiveUri = (new Path(originalDir, archiveName)).toUri();
    URI originalUri = ArchiveUtils.addSlash(originalDir.toUri());
    ArchiveUtils.HarPathHelper harHelper = new ArchiveUtils.HarPathHelper(conf, archiveUri, originalUri);
    // if they are different, we throw an error
    for (Partition p : partitions) {
        if (ArchiveUtils.isArchived(p)) {
            if (ArchiveUtils.getArchivingLevel(p) != partSpecInfo.values.size()) {
                String name = ArchiveUtils.getPartialName(p, ArchiveUtils.getArchivingLevel(p));
                String m = String.format("Conflict with existing archive %s", name);
                throw new HiveException(m);
            } else {
                throw new HiveException("Partition(s) already archived");
            }
        }
    }
    boolean recovery = false;
    if (pathExists(intermediateArchivedDir) || pathExists(intermediateOriginalDir)) {
        recovery = true;
        console.printInfo("Starting recovery after failed ARCHIVE");
    }
    // to use as the move operation that created it is atomic.
    if (!pathExists(intermediateArchivedDir) && !pathExists(intermediateOriginalDir)) {
        // First create the archive in a tmp dir so that if the job fails, the
        // bad files don't pollute the filesystem
        Path tmpPath = new Path(driverContext.getCtx().getExternalTmpPath(originalDir), "partlevel");
        console.printInfo("Creating " + archiveName + " for " + originalDir.toString());
        console.printInfo("in " + tmpPath);
        console.printInfo("Please wait... (this may take a while)");
        // Create the Hadoop archive
        int ret = 0;
        try {
            int maxJobNameLen = conf.getIntVar(HiveConf.ConfVars.HIVEJOBNAMELENGTH);
            String jobname = String.format("Archiving %s@%s", tbl.getTableName(), partSpecInfo.getName());
            jobname = Utilities.abbreviate(jobname, maxJobNameLen - 6);
            conf.set(MRJobConfig.JOB_NAME, jobname);
            HadoopArchives har = new HadoopArchives(conf);
            List<String> args = new ArrayList<String>();
            args.add("-archiveName");
            args.add(archiveName);
            args.add("-p");
            args.add(originalDir.toString());
            args.add(tmpPath.toString());
            ret = ToolRunner.run(har, args.toArray(new String[0]));
        } catch (Exception e) {
            throw new HiveException(e);
        }
        if (ret != 0) {
            throw new HiveException("Error while creating HAR");
        }
        // the partition directory. e.g. .../hr=12-intermediate-archived
        try {
            console.printInfo("Moving " + tmpPath + " to " + intermediateArchivedDir);
            if (pathExists(intermediateArchivedDir)) {
                throw new HiveException("The intermediate archive directory already exists.");
            }
            fs.rename(tmpPath, intermediateArchivedDir);
        } catch (IOException e) {
            throw new HiveException("Error while moving tmp directory");
        }
    } else {
        if (pathExists(intermediateArchivedDir)) {
            console.printInfo("Intermediate archive directory " + intermediateArchivedDir + " already exists. Assuming it contains an archived version of the partition");
        }
    }
    // if the move hasn't been made already
    if (!pathExists(intermediateOriginalDir)) {
        console.printInfo("Moving " + originalDir + " to " + intermediateOriginalDir);
        moveDir(fs, originalDir, intermediateOriginalDir);
    } else {
        console.printInfo(intermediateOriginalDir + " already exists. " + "Assuming it contains the original files in the partition");
    }
    // Move the intermediate archived directory to the original parent directory
    if (!pathExists(originalDir)) {
        console.printInfo("Moving " + intermediateArchivedDir + " to " + originalDir);
        moveDir(fs, intermediateArchivedDir, originalDir);
    } else {
        console.printInfo(originalDir + " already exists. " + "Assuming it contains the archived version of the partition");
    }
    // Record this change in the metastore
    try {
        for (Partition p : partitions) {
            URI originalPartitionUri = ArchiveUtils.addSlash(p.getDataLocation().toUri());
            URI harPartitionDir = harHelper.getHarUri(originalPartitionUri);
            StringBuilder authority = new StringBuilder();
            if (harPartitionDir.getUserInfo() != null) {
                authority.append(harPartitionDir.getUserInfo()).append("@");
            }
            authority.append(harPartitionDir.getHost());
            if (harPartitionDir.getPort() != -1) {
                authority.append(":").append(harPartitionDir.getPort());
            }
            Path harPath = new Path(harPartitionDir.getScheme(), authority.toString(), // make in Path to ensure no slash at the end
            harPartitionDir.getPath());
            setArchived(p, harPath, partSpecInfo.values.size());
            db.alterPartition(simpleDesc.getTableName(), p, null);
        }
    } catch (Exception e) {
        throw new HiveException("Unable to change the partition info for HAR", e);
    }
    // will not be deleted. The user will run ARCHIVE again to clear this up
    if (pathExists(intermediateOriginalDir)) {
        deleteDir(intermediateOriginalDir);
    }
    if (recovery) {
        console.printInfo("Recovery after ARCHIVE succeeded");
    }
    return 0;
}
Also used : Path(org.apache.hadoop.fs.Path) Partition(org.apache.hadoop.hive.ql.metadata.Partition) AlterTableExchangePartition(org.apache.hadoop.hive.ql.plan.AlterTableExchangePartition) TextMetaDataTable(org.apache.hadoop.hive.ql.metadata.formatting.TextMetaDataTable) Table(org.apache.hadoop.hive.ql.metadata.Table) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) ArrayList(java.util.ArrayList) IOException(java.io.IOException) URI(java.net.URI) SQLUniqueConstraint(org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint) CheckConstraint(org.apache.hadoop.hive.ql.metadata.CheckConstraint) NotNullConstraint(org.apache.hadoop.hive.ql.metadata.NotNullConstraint) SQLCheckConstraint(org.apache.hadoop.hive.metastore.api.SQLCheckConstraint) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) DefaultConstraint(org.apache.hadoop.hive.ql.metadata.DefaultConstraint) UniqueConstraint(org.apache.hadoop.hive.ql.metadata.UniqueConstraint) SQLNotNullConstraint(org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint) AlreadyExistsException(org.apache.hadoop.hive.metastore.api.AlreadyExistsException) InvalidOperationException(org.apache.hadoop.hive.metastore.api.InvalidOperationException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) NoSuchObjectException(org.apache.hadoop.hive.metastore.api.NoSuchObjectException) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) URISyntaxException(java.net.URISyntaxException) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) InvalidObjectException(org.apache.hadoop.hive.metastore.api.InvalidObjectException) SQLException(java.sql.SQLException) FileNotFoundException(java.io.FileNotFoundException) HiveAuthzPluginException(org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthzPluginException) InvalidTableException(org.apache.hadoop.hive.ql.metadata.InvalidTableException) PartSpecInfo(org.apache.hadoop.hive.ql.exec.ArchiveUtils.PartSpecInfo) FileSystem(org.apache.hadoop.fs.FileSystem) HadoopArchives(org.apache.hadoop.tools.HadoopArchives)

Example 42 with Partition

use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.

the class DDLTask method describeTable.

/**
 * Write the description of a table to a file.
 *
 * @param db
 *          The database in question.
 * @param descTbl
 *          This is the table we're interested in.
 * @return Returns 0 when execution succeeds and above 0 if it fails.
 * @throws HiveException
 *           Throws this exception if an unexpected error occurs.
 * @throws MetaException
 */
private int describeTable(Hive db, DescTableDesc descTbl) throws HiveException, MetaException {
    String colPath = descTbl.getColumnPath();
    String tableName = descTbl.getTableName();
    // describe the table - populate the output stream
    Table tbl = db.getTable(tableName, false);
    if (tbl == null) {
        throw new HiveException(ErrorMsg.INVALID_TABLE, tableName);
    }
    Partition part = null;
    if (descTbl.getPartSpec() != null) {
        part = db.getPartition(tbl, descTbl.getPartSpec(), false);
        if (part == null) {
            throw new HiveException(ErrorMsg.INVALID_PARTITION, StringUtils.join(descTbl.getPartSpec().keySet(), ','), tableName);
        }
        tbl = part.getTable();
    }
    DataOutputStream outStream = getOutputStream(descTbl.getResFile());
    try {
        LOG.debug("DDLTask: got data for {}", tableName);
        List<FieldSchema> cols = null;
        List<ColumnStatisticsObj> colStats = null;
        Deserializer deserializer = tbl.getDeserializer(true);
        if (deserializer instanceof AbstractSerDe) {
            String errorMsgs = ((AbstractSerDe) deserializer).getConfigurationErrors();
            if (errorMsgs != null && !errorMsgs.isEmpty()) {
                throw new SQLException(errorMsgs);
            }
        }
        if (colPath.equals(tableName)) {
            cols = (part == null || tbl.getTableType() == TableType.VIRTUAL_VIEW) ? tbl.getCols() : part.getCols();
            if (!descTbl.isFormatted()) {
                cols.addAll(tbl.getPartCols());
            }
            if (tbl.isPartitioned() && part == null) {
                // No partitioned specified for partitioned table, lets fetch all.
                Map<String, String> tblProps = tbl.getParameters() == null ? new HashMap<String, String>() : tbl.getParameters();
                Map<String, Long> valueMap = new HashMap<>();
                Map<String, Boolean> stateMap = new HashMap<>();
                for (String stat : StatsSetupConst.supportedStats) {
                    valueMap.put(stat, 0L);
                    stateMap.put(stat, true);
                }
                PartitionIterable parts = new PartitionIterable(db, tbl, null, conf.getIntVar(HiveConf.ConfVars.METASTORE_BATCH_RETRIEVE_MAX));
                int numParts = 0;
                for (Partition partition : parts) {
                    Map<String, String> props = partition.getParameters();
                    Boolean state = StatsSetupConst.areBasicStatsUptoDate(props);
                    for (String stat : StatsSetupConst.supportedStats) {
                        stateMap.put(stat, stateMap.get(stat) && state);
                        if (props != null && props.get(stat) != null) {
                            valueMap.put(stat, valueMap.get(stat) + Long.parseLong(props.get(stat)));
                        }
                    }
                    numParts++;
                }
                for (String stat : StatsSetupConst.supportedStats) {
                    StatsSetupConst.setBasicStatsState(tblProps, Boolean.toString(stateMap.get(stat)));
                    tblProps.put(stat, valueMap.get(stat).toString());
                }
                tblProps.put(StatsSetupConst.NUM_PARTITIONS, Integer.toString(numParts));
                tbl.setParameters(tblProps);
            }
        } else {
            if (descTbl.isFormatted()) {
                // when column name is specified in describe table DDL, colPath will
                // will be table_name.column_name
                String colName = colPath.split("\\.")[1];
                String[] dbTab = Utilities.getDbTableName(tableName);
                List<String> colNames = new ArrayList<String>();
                colNames.add(colName.toLowerCase());
                if (null == part) {
                    if (tbl.isPartitioned()) {
                        Map<String, String> tblProps = tbl.getParameters() == null ? new HashMap<String, String>() : tbl.getParameters();
                        if (tbl.isPartitionKey(colNames.get(0))) {
                            FieldSchema partCol = tbl.getPartColByName(colNames.get(0));
                            cols = Collections.singletonList(partCol);
                            PartitionIterable parts = new PartitionIterable(db, tbl, null, conf.getIntVar(HiveConf.ConfVars.METASTORE_BATCH_RETRIEVE_MAX));
                            ColumnInfo ci = new ColumnInfo(partCol.getName(), TypeInfoUtils.getTypeInfoFromTypeString(partCol.getType()), null, false);
                            ColStatistics cs = StatsUtils.getColStatsForPartCol(ci, parts, conf);
                            ColumnStatisticsData data = new ColumnStatisticsData();
                            ColStatistics.Range r = cs.getRange();
                            StatObjectConverter.fillColumnStatisticsData(partCol.getType(), data, r == null ? null : r.minValue, r == null ? null : r.maxValue, r == null ? null : r.minValue, r == null ? null : r.maxValue, r == null ? null : r.minValue.toString(), r == null ? null : r.maxValue.toString(), cs.getNumNulls(), cs.getCountDistint(), null, cs.getAvgColLen(), cs.getAvgColLen(), cs.getNumTrues(), cs.getNumFalses());
                            ColumnStatisticsObj cso = new ColumnStatisticsObj(partCol.getName(), partCol.getType(), data);
                            colStats = Collections.singletonList(cso);
                            StatsSetupConst.setColumnStatsState(tblProps, colNames);
                        } else {
                            cols = Hive.getFieldsFromDeserializer(colPath, deserializer);
                            List<String> parts = db.getPartitionNames(dbTab[0].toLowerCase(), dbTab[1].toLowerCase(), (short) -1);
                            AggrStats aggrStats = db.getAggrColStatsFor(dbTab[0].toLowerCase(), dbTab[1].toLowerCase(), colNames, parts);
                            colStats = aggrStats.getColStats();
                            if (parts.size() == aggrStats.getPartsFound()) {
                                StatsSetupConst.setColumnStatsState(tblProps, colNames);
                            } else {
                                StatsSetupConst.removeColumnStatsState(tblProps, colNames);
                            }
                        }
                        tbl.setParameters(tblProps);
                    } else {
                        cols = Hive.getFieldsFromDeserializer(colPath, deserializer);
                        colStats = db.getTableColumnStatistics(dbTab[0].toLowerCase(), dbTab[1].toLowerCase(), colNames);
                    }
                } else {
                    List<String> partitions = new ArrayList<String>();
                    partitions.add(part.getName());
                    cols = Hive.getFieldsFromDeserializer(colPath, deserializer);
                    colStats = db.getPartitionColumnStatistics(dbTab[0].toLowerCase(), dbTab[1].toLowerCase(), partitions, colNames).get(part.getName());
                }
            } else {
                cols = Hive.getFieldsFromDeserializer(colPath, deserializer);
            }
        }
        PrimaryKeyInfo pkInfo = null;
        ForeignKeyInfo fkInfo = null;
        UniqueConstraint ukInfo = null;
        NotNullConstraint nnInfo = null;
        DefaultConstraint dInfo = null;
        CheckConstraint cInfo = null;
        if (descTbl.isExt() || descTbl.isFormatted()) {
            pkInfo = db.getPrimaryKeys(tbl.getDbName(), tbl.getTableName());
            fkInfo = db.getForeignKeys(tbl.getDbName(), tbl.getTableName());
            ukInfo = db.getUniqueConstraints(tbl.getDbName(), tbl.getTableName());
            nnInfo = db.getNotNullConstraints(tbl.getDbName(), tbl.getTableName());
            dInfo = db.getDefaultConstraints(tbl.getDbName(), tbl.getTableName());
            cInfo = db.getCheckConstraints(tbl.getDbName(), tbl.getTableName());
        }
        fixDecimalColumnTypeName(cols);
        // In case the query is served by HiveServer2, don't pad it with spaces,
        // as HiveServer2 output is consumed by JDBC/ODBC clients.
        boolean isOutputPadded = !SessionState.get().isHiveServerQuery();
        formatter.describeTable(outStream, colPath, tableName, tbl, part, cols, descTbl.isFormatted(), descTbl.isExt(), isOutputPadded, colStats, pkInfo, fkInfo, ukInfo, nnInfo, dInfo, cInfo);
        LOG.debug("DDLTask: written data for {}", tableName);
    } catch (SQLException e) {
        throw new HiveException(e, ErrorMsg.GENERIC_ERROR, tableName);
    } finally {
        IOUtils.closeStream(outStream);
    }
    return 0;
}
Also used : HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) SQLException(java.sql.SQLException) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap) AggrStats(org.apache.hadoop.hive.metastore.api.AggrStats) DataOutputStream(java.io.DataOutputStream) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) ArrayList(java.util.ArrayList) SQLUniqueConstraint(org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint) UniqueConstraint(org.apache.hadoop.hive.ql.metadata.UniqueConstraint) AbstractSerDe(org.apache.hadoop.hive.serde2.AbstractSerDe) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) DefaultConstraint(org.apache.hadoop.hive.ql.metadata.DefaultConstraint) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) PrimaryKeyInfo(org.apache.hadoop.hive.ql.metadata.PrimaryKeyInfo) ForeignKeyInfo(org.apache.hadoop.hive.ql.metadata.ForeignKeyInfo) ColStatistics(org.apache.hadoop.hive.ql.plan.ColStatistics) NotNullConstraint(org.apache.hadoop.hive.ql.metadata.NotNullConstraint) SQLNotNullConstraint(org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint) CheckConstraint(org.apache.hadoop.hive.ql.metadata.CheckConstraint) SQLCheckConstraint(org.apache.hadoop.hive.metastore.api.SQLCheckConstraint) Partition(org.apache.hadoop.hive.ql.metadata.Partition) AlterTableExchangePartition(org.apache.hadoop.hive.ql.plan.AlterTableExchangePartition) TextMetaDataTable(org.apache.hadoop.hive.ql.metadata.formatting.TextMetaDataTable) Table(org.apache.hadoop.hive.ql.metadata.Table) SQLUniqueConstraint(org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint) CheckConstraint(org.apache.hadoop.hive.ql.metadata.CheckConstraint) NotNullConstraint(org.apache.hadoop.hive.ql.metadata.NotNullConstraint) SQLCheckConstraint(org.apache.hadoop.hive.metastore.api.SQLCheckConstraint) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) DefaultConstraint(org.apache.hadoop.hive.ql.metadata.DefaultConstraint) UniqueConstraint(org.apache.hadoop.hive.ql.metadata.UniqueConstraint) SQLNotNullConstraint(org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint) PartitionIterable(org.apache.hadoop.hive.ql.metadata.PartitionIterable) Deserializer(org.apache.hadoop.hive.serde2.Deserializer) ColumnStatisticsData(org.apache.hadoop.hive.metastore.api.ColumnStatisticsData)

Example 43 with Partition

use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.

the class DDLTask method compact.

private int compact(Hive db, AlterTableSimpleDesc desc) throws HiveException {
    Table tbl = db.getTable(desc.getTableName());
    if (!AcidUtils.isTransactionalTable(tbl)) {
        throw new HiveException(ErrorMsg.NONACID_COMPACTION_NOT_SUPPORTED, tbl.getDbName(), tbl.getTableName());
    }
    String partName = null;
    if (desc.getPartSpec() == null) {
        // Compaction can only be done on the whole table if the table is non-partitioned.
        if (tbl.isPartitioned()) {
            throw new HiveException(ErrorMsg.NO_COMPACTION_PARTITION);
        }
    } else {
        Map<String, String> partSpec = desc.getPartSpec();
        List<Partition> partitions = db.getPartitions(tbl, partSpec);
        if (partitions.size() > 1) {
            throw new HiveException(ErrorMsg.TOO_MANY_COMPACTION_PARTITIONS);
        } else if (partitions.size() == 0) {
            throw new HiveException(ErrorMsg.INVALID_PARTITION_SPEC);
        }
        partName = partitions.get(0).getName();
    }
    CompactionResponse resp = db.compact2(tbl.getDbName(), tbl.getTableName(), partName, desc.getCompactionType(), desc.getProps());
    if (resp.isAccepted()) {
        console.printInfo("Compaction enqueued with id " + resp.getId());
    } else {
        console.printInfo("Compaction already enqueued with id " + resp.getId() + "; State is " + resp.getState());
    }
    if (desc.isBlocking() && resp.isAccepted()) {
        StringBuilder progressDots = new StringBuilder();
        long waitTimeMs = 1000;
        wait: while (true) {
            // double wait time until 5min
            waitTimeMs = waitTimeMs * 2;
            waitTimeMs = waitTimeMs < 5 * 60 * 1000 ? waitTimeMs : 5 * 60 * 1000;
            try {
                Thread.sleep(waitTimeMs);
            } catch (InterruptedException ex) {
                console.printInfo("Interrupted while waiting for compaction with id=" + resp.getId());
                break;
            }
            // this could be expensive when there are a lot of compactions....
            // todo: update to search by ID once HIVE-13353 is done
            ShowCompactResponse allCompactions = db.showCompactions();
            for (ShowCompactResponseElement compaction : allCompactions.getCompacts()) {
                if (resp.getId() != compaction.getId()) {
                    continue;
                }
                switch(compaction.getState()) {
                    case TxnStore.WORKING_RESPONSE:
                    case TxnStore.INITIATED_RESPONSE:
                        // still working
                        console.printInfo(progressDots.toString());
                        progressDots.append(".");
                        continue wait;
                    default:
                        // done
                        console.printInfo("Compaction with id " + resp.getId() + " finished with status: " + compaction.getState());
                        break wait;
                }
            }
        }
    }
    return 0;
}
Also used : Partition(org.apache.hadoop.hive.ql.metadata.Partition) AlterTableExchangePartition(org.apache.hadoop.hive.ql.plan.AlterTableExchangePartition) TextMetaDataTable(org.apache.hadoop.hive.ql.metadata.formatting.TextMetaDataTable) Table(org.apache.hadoop.hive.ql.metadata.Table) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) ShowCompactResponse(org.apache.hadoop.hive.metastore.api.ShowCompactResponse) CompactionResponse(org.apache.hadoop.hive.metastore.api.CompactionResponse) ShowCompactResponseElement(org.apache.hadoop.hive.metastore.api.ShowCompactResponseElement)

Example 44 with Partition

use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.

the class DDLTask method dropTable.

private void dropTable(Hive db, Table tbl, DropTableDesc dropTbl) throws HiveException {
    // This is a true DROP TABLE
    if (tbl != null && dropTbl.getValidationRequired()) {
        if (tbl.isView()) {
            if (!dropTbl.getExpectView()) {
                if (dropTbl.getIfExists()) {
                    return;
                }
                if (dropTbl.getExpectMaterializedView()) {
                    throw new HiveException("Cannot drop a view with DROP MATERIALIZED VIEW");
                } else {
                    throw new HiveException("Cannot drop a view with DROP TABLE");
                }
            }
        } else if (tbl.isMaterializedView()) {
            if (!dropTbl.getExpectMaterializedView()) {
                if (dropTbl.getIfExists()) {
                    return;
                }
                if (dropTbl.getExpectView()) {
                    throw new HiveException("Cannot drop a materialized view with DROP VIEW");
                } else {
                    throw new HiveException("Cannot drop a materialized view with DROP TABLE");
                }
            }
        } else {
            if (dropTbl.getExpectView()) {
                if (dropTbl.getIfExists()) {
                    return;
                }
                throw new HiveException("Cannot drop a base table with DROP VIEW");
            } else if (dropTbl.getExpectMaterializedView()) {
                if (dropTbl.getIfExists()) {
                    return;
                }
                throw new HiveException("Cannot drop a base table with DROP MATERIALIZED VIEW");
            }
        }
    }
    ReplicationSpec replicationSpec = dropTbl.getReplicationSpec();
    if ((tbl != null) && replicationSpec.isInReplicationScope()) {
        /**
         * DROP TABLE FOR REPLICATION behaves differently from DROP TABLE IF EXISTS - it more closely
         * matches a DROP TABLE IF OLDER THAN(x) semantic.
         *
         * Ideally, commands executed under the scope of replication need to be idempotent and resilient
         * to repeats. What can happen, sometimes, is that a drone processing a replication task can
         * have been abandoned for not returning in time, but still execute its task after a while,
         * which should not result in it mucking up data that has been impressed later on. So, for eg.,
         * if we create partition P1, followed by droppping it, followed by creating it yet again,
         * the replication of that drop should not drop the newer partition if it runs after the destination
         * object is already in the newer state.
         *
         * Thus, we check the replicationSpec.allowEventReplacementInto to determine whether or not we can
         * drop the object in question(will return false if object is newer than the event, true if not)
         *
         * In addition, since DROP TABLE FOR REPLICATION can result in a table not being dropped, while DROP
         * TABLE will always drop the table, and the included partitions, DROP TABLE FOR REPLICATION must
         * do one more thing - if it does not drop the table because the table is in a newer state, it must
         * drop the partitions inside it that are older than this event. To wit, DROP TABLE FOR REPL
         * acts like a recursive DROP TABLE IF OLDER.
         */
        if (!replicationSpec.allowEventReplacementInto(tbl.getParameters())) {
            // any partitions inside that are older.
            if (tbl.isPartitioned()) {
                PartitionIterable partitions = new PartitionIterable(db, tbl, null, conf.getIntVar(HiveConf.ConfVars.METASTORE_BATCH_RETRIEVE_MAX));
                for (Partition p : Iterables.filter(partitions, replicationSpec.allowEventReplacementInto())) {
                    db.dropPartition(tbl.getDbName(), tbl.getTableName(), p.getValues(), true);
                }
            }
            LOG.debug("DDLTask: Drop Table is skipped as table {} is newer than update", dropTbl.getTableName());
            // table is newer, leave it be.
            return;
        }
    }
    // drop the table
    db.dropTable(dropTbl.getTableName(), dropTbl.getIfPurge());
    if (tbl != null) {
        // Remove from cache if it is a materialized view
        if (tbl.isMaterializedView()) {
            HiveMaterializedViewsRegistry.get().dropMaterializedView(tbl);
        }
        // We have already locked the table in DDLSemanticAnalyzer, don't do it again here
        addIfAbsentByName(new WriteEntity(tbl, WriteEntity.WriteType.DDL_NO_LOCK));
    }
}
Also used : Partition(org.apache.hadoop.hive.ql.metadata.Partition) AlterTableExchangePartition(org.apache.hadoop.hive.ql.plan.AlterTableExchangePartition) ReplicationSpec(org.apache.hadoop.hive.ql.parse.ReplicationSpec) PartitionIterable(org.apache.hadoop.hive.ql.metadata.PartitionIterable) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity)

Example 45 with Partition

use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.

the class DDLTask method alterTable.

/**
 * Alter a given table.
 *
 * @param db
 *          The database in question.
 * @param alterTbl
 *          This is the table we're altering.
 * @return Returns 0 when execution succeeds and above 0 if it fails.
 * @throws HiveException
 *           Throws this exception if an unexpected error occurs.
 */
private int alterTable(Hive db, AlterTableDesc alterTbl) throws HiveException {
    if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.RENAME) {
        String[] names = Utilities.getDbTableName(alterTbl.getOldName());
        if (Utils.isBootstrapDumpInProgress(db, names[0])) {
            LOG.error("DDLTask: Rename Table not allowed as bootstrap dump in progress");
            throw new HiveException("Rename Table: Not allowed as bootstrap dump in progress");
        }
    }
    // alter the table
    Table tbl = db.getTable(alterTbl.getOldName());
    List<Partition> allPartitions = null;
    if (alterTbl.getPartSpec() != null) {
        Map<String, String> partSpec = alterTbl.getPartSpec();
        if (DDLSemanticAnalyzer.isFullSpec(tbl, partSpec)) {
            allPartitions = new ArrayList<Partition>();
            Partition part = db.getPartition(tbl, partSpec, false);
            if (part == null) {
                // User provided a fully specified partition spec but it doesn't exist, fail.
                throw new HiveException(ErrorMsg.INVALID_PARTITION, StringUtils.join(alterTbl.getPartSpec().keySet(), ',') + " for table " + alterTbl.getOldName());
            }
            allPartitions.add(part);
        } else {
            // DDLSemanticAnalyzer has already checked if partial partition specs are allowed,
            // thus we should not need to check it here.
            allPartitions = db.getPartitions(tbl, alterTbl.getPartSpec());
        }
    }
    // Don't change the table object returned by the metastore, as we'll mess with it's caches.
    Table oldTbl = tbl;
    tbl = oldTbl.copy();
    // but let's make it a little bit more explicit.
    if (allPartitions != null) {
        // Alter all partitions
        for (Partition part : allPartitions) {
            addChildTasks(alterTableOrSinglePartition(alterTbl, tbl, part));
        }
    } else {
        // Just alter the table
        addChildTasks(alterTableOrSinglePartition(alterTbl, tbl, null));
    }
    if (allPartitions == null) {
        updateModifiedParameters(tbl.getTTable().getParameters(), conf);
        tbl.checkValidity(conf);
    } else {
        for (Partition tmpPart : allPartitions) {
            updateModifiedParameters(tmpPart.getParameters(), conf);
        }
    }
    try {
        if (allPartitions == null) {
            db.alterTable(alterTbl.getOldName(), tbl, alterTbl.getIsCascade(), alterTbl.getEnvironmentContext());
        } else {
            db.alterPartitions(Warehouse.getQualifiedName(tbl.getTTable()), allPartitions, alterTbl.getEnvironmentContext());
        }
        // Add constraints if necessary
        addConstraints(db, alterTbl);
    } catch (InvalidOperationException e) {
        LOG.error("alter table: ", e);
        throw new HiveException(e, ErrorMsg.GENERIC_ERROR);
    }
    // Don't acquire locks for any of these, we have already asked for them in DDLSemanticAnalyzer.
    if (allPartitions != null) {
        for (Partition tmpPart : allPartitions) {
            work.getInputs().add(new ReadEntity(tmpPart));
            addIfAbsentByName(new WriteEntity(tmpPart, WriteEntity.WriteType.DDL_NO_LOCK));
        }
    } else {
        work.getInputs().add(new ReadEntity(oldTbl));
        addIfAbsentByName(new WriteEntity(tbl, WriteEntity.WriteType.DDL_NO_LOCK));
    }
    return 0;
}
Also used : ReadEntity(org.apache.hadoop.hive.ql.hooks.ReadEntity) Partition(org.apache.hadoop.hive.ql.metadata.Partition) AlterTableExchangePartition(org.apache.hadoop.hive.ql.plan.AlterTableExchangePartition) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) TextMetaDataTable(org.apache.hadoop.hive.ql.metadata.formatting.TextMetaDataTable) Table(org.apache.hadoop.hive.ql.metadata.Table) InvalidOperationException(org.apache.hadoop.hive.metastore.api.InvalidOperationException) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity)

Aggregations

Partition (org.apache.hadoop.hive.ql.metadata.Partition)102 Table (org.apache.hadoop.hive.ql.metadata.Table)56 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)48 ArrayList (java.util.ArrayList)43 Path (org.apache.hadoop.fs.Path)25 AlterTableExchangePartition (org.apache.hadoop.hive.ql.plan.AlterTableExchangePartition)25 WriteEntity (org.apache.hadoop.hive.ql.hooks.WriteEntity)24 IOException (java.io.IOException)18 HashMap (java.util.HashMap)18 LinkedHashMap (java.util.LinkedHashMap)18 ReadEntity (org.apache.hadoop.hive.ql.hooks.ReadEntity)18 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)18 PrunedPartitionList (org.apache.hadoop.hive.ql.parse.PrunedPartitionList)17 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)14 FileNotFoundException (java.io.FileNotFoundException)12 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)12 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)12 InvalidOperationException (org.apache.hadoop.hive.metastore.api.InvalidOperationException)11 SQLCheckConstraint (org.apache.hadoop.hive.metastore.api.SQLCheckConstraint)11 SQLDefaultConstraint (org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint)11