Search in sources :

Example 1 with ReplicationSpec

use of org.apache.hadoop.hive.ql.parse.ReplicationSpec in project hive by apache.

the class DDLTask method dropDatabase.

/**
 * Drop a Database
 * @param db
 * @param dropDb
 * @return Always returns 0
 * @throws HiveException
 */
private int dropDatabase(Hive db, DropDatabaseDesc dropDb) throws HiveException {
    try {
        String dbName = dropDb.getDatabaseName();
        ReplicationSpec replicationSpec = dropDb.getReplicationSpec();
        if (replicationSpec.isInReplicationScope()) {
            Database database = db.getDatabase(dbName);
            if (database == null || !replicationSpec.allowEventReplacementInto(database.getParameters())) {
                return 0;
            }
        }
        db.dropDatabase(dbName, true, dropDb.getIfExists(), dropDb.isCasdade());
        // Unregister the functions as well
        if (dropDb.isCasdade()) {
            FunctionRegistry.unregisterPermanentFunctions(dbName);
        }
    } catch (NoSuchObjectException ex) {
        throw new HiveException(ex, ErrorMsg.DATABASE_NOT_EXISTS, dropDb.getDatabaseName());
    }
    return 0;
}
Also used : ReplicationSpec(org.apache.hadoop.hive.ql.parse.ReplicationSpec) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) Database(org.apache.hadoop.hive.metastore.api.Database) NoSuchObjectException(org.apache.hadoop.hive.metastore.api.NoSuchObjectException)

Example 2 with ReplicationSpec

use of org.apache.hadoop.hive.ql.parse.ReplicationSpec in project hive by apache.

the class DDLTask method dropTable.

private void dropTable(Hive db, Table tbl, DropTableDesc dropTbl) throws HiveException {
    // This is a true DROP TABLE
    if (tbl != null && dropTbl.getValidationRequired()) {
        if (tbl.isView()) {
            if (!dropTbl.getExpectView()) {
                if (dropTbl.getIfExists()) {
                    return;
                }
                if (dropTbl.getExpectMaterializedView()) {
                    throw new HiveException("Cannot drop a view with DROP MATERIALIZED VIEW");
                } else {
                    throw new HiveException("Cannot drop a view with DROP TABLE");
                }
            }
        } else if (tbl.isMaterializedView()) {
            if (!dropTbl.getExpectMaterializedView()) {
                if (dropTbl.getIfExists()) {
                    return;
                }
                if (dropTbl.getExpectView()) {
                    throw new HiveException("Cannot drop a materialized view with DROP VIEW");
                } else {
                    throw new HiveException("Cannot drop a materialized view with DROP TABLE");
                }
            }
        } else {
            if (dropTbl.getExpectView()) {
                if (dropTbl.getIfExists()) {
                    return;
                }
                throw new HiveException("Cannot drop a base table with DROP VIEW");
            } else if (dropTbl.getExpectMaterializedView()) {
                if (dropTbl.getIfExists()) {
                    return;
                }
                throw new HiveException("Cannot drop a base table with DROP MATERIALIZED VIEW");
            }
        }
    }
    ReplicationSpec replicationSpec = dropTbl.getReplicationSpec();
    if ((tbl != null) && replicationSpec.isInReplicationScope()) {
        /**
         * DROP TABLE FOR REPLICATION behaves differently from DROP TABLE IF EXISTS - it more closely
         * matches a DROP TABLE IF OLDER THAN(x) semantic.
         *
         * Ideally, commands executed under the scope of replication need to be idempotent and resilient
         * to repeats. What can happen, sometimes, is that a drone processing a replication task can
         * have been abandoned for not returning in time, but still execute its task after a while,
         * which should not result in it mucking up data that has been impressed later on. So, for eg.,
         * if we create partition P1, followed by droppping it, followed by creating it yet again,
         * the replication of that drop should not drop the newer partition if it runs after the destination
         * object is already in the newer state.
         *
         * Thus, we check the replicationSpec.allowEventReplacementInto to determine whether or not we can
         * drop the object in question(will return false if object is newer than the event, true if not)
         *
         * In addition, since DROP TABLE FOR REPLICATION can result in a table not being dropped, while DROP
         * TABLE will always drop the table, and the included partitions, DROP TABLE FOR REPLICATION must
         * do one more thing - if it does not drop the table because the table is in a newer state, it must
         * drop the partitions inside it that are older than this event. To wit, DROP TABLE FOR REPL
         * acts like a recursive DROP TABLE IF OLDER.
         */
        if (!replicationSpec.allowEventReplacementInto(tbl.getParameters())) {
            // any partitions inside that are older.
            if (tbl.isPartitioned()) {
                PartitionIterable partitions = new PartitionIterable(db, tbl, null, conf.getIntVar(HiveConf.ConfVars.METASTORE_BATCH_RETRIEVE_MAX));
                for (Partition p : Iterables.filter(partitions, replicationSpec.allowEventReplacementInto())) {
                    db.dropPartition(tbl.getDbName(), tbl.getTableName(), p.getValues(), true);
                }
            }
            LOG.debug("DDLTask: Drop Table is skipped as table {} is newer than update", dropTbl.getTableName());
            // table is newer, leave it be.
            return;
        }
    }
    // drop the table
    db.dropTable(dropTbl.getTableName(), dropTbl.getIfPurge());
    if (tbl != null) {
        // Remove from cache if it is a materialized view
        if (tbl.isMaterializedView()) {
            HiveMaterializedViewsRegistry.get().dropMaterializedView(tbl);
        }
        // We have already locked the table in DDLSemanticAnalyzer, don't do it again here
        addIfAbsentByName(new WriteEntity(tbl, WriteEntity.WriteType.DDL_NO_LOCK));
    }
}
Also used : Partition(org.apache.hadoop.hive.ql.metadata.Partition) AlterTableExchangePartition(org.apache.hadoop.hive.ql.plan.AlterTableExchangePartition) ReplicationSpec(org.apache.hadoop.hive.ql.parse.ReplicationSpec) PartitionIterable(org.apache.hadoop.hive.ql.metadata.PartitionIterable) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity)

Example 3 with ReplicationSpec

use of org.apache.hadoop.hive.ql.parse.ReplicationSpec in project hive by apache.

the class DropTableOperation method execute.

@Override
public int execute() throws HiveException {
    Table table = getTable();
    if (table == null) {
        // dropping not existing table is handled by DropTableAnalyzer
        return 0;
    }
    if (desc.getValidationRequired()) {
        if (table.isView() || table.isMaterializedView()) {
            if (desc.isIfExists()) {
                return 0;
            } else if (table.isView()) {
                throw new HiveException("Cannot drop a view with DROP TABLE");
            } else {
                throw new HiveException("Cannot drop a materialized view with DROP TABLE");
            }
        }
    }
    ReplicationSpec replicationSpec = desc.getReplicationSpec();
    if (replicationSpec.isInReplicationScope()) {
        /**
         * DROP TABLE FOR REPLICATION behaves differently from DROP TABLE IF EXISTS - it more closely
         * matches a DROP TABLE IF OLDER THAN(x) semantic.
         *
         * Ideally, commands executed under the scope of replication need to be idempotent and resilient
         * to repeats. What can happen, sometimes, is that a drone processing a replication task can
         * have been abandoned for not returning in time, but still execute its task after a while,
         * which should not result in it mucking up data that has been impressed later on. So, for eg.,
         * if we create partition P1, followed by droppping it, followed by creating it yet again,
         * the replication of that drop should not drop the newer partition if it runs after the destination
         * object is already in the newer state.
         *
         * Thus, we check the replicationSpec.allowEventReplacementInto to determine whether or not we can
         * drop the object in question(will return false if object is newer than the event, true if not)
         *
         * In addition, since DROP TABLE FOR REPLICATION can result in a table not being dropped, while DROP
         * TABLE will always drop the table, and the included partitions, DROP TABLE FOR REPLICATION must
         * do one more thing - if it does not drop the table because the table is in a newer state, it must
         * drop the partitions inside it that are older than this event. To wit, DROP TABLE FOR REPL
         * acts like a recursive DROP TABLE IF OLDER.
         */
        Map<String, String> dbParams = context.getDb().getDatabase(table.getDbName()).getParameters();
        if (!replicationSpec.allowEventReplacementInto(dbParams)) {
            // any partitions inside that are older.
            if (table.isPartitioned()) {
                PartitionIterable partitions = new PartitionIterable(context.getDb(), table, null, MetastoreConf.getIntVar(context.getConf(), MetastoreConf.ConfVars.BATCH_RETRIEVE_MAX));
                for (Partition p : partitions) {
                    if (replicationSpec.allowEventReplacementInto(dbParams)) {
                        context.getDb().dropPartition(table.getDbName(), table.getTableName(), p.getValues(), true);
                    }
                }
            }
            LOG.debug("DDLTask: Drop Table is skipped as table {} is newer than update", desc.getTableName());
            // table is newer, leave it be.
            return 0;
        }
    }
    // TODO: API w/catalog name
    context.getDb().dropTable(table, desc.isPurge());
    DDLUtils.addIfAbsentByName(new WriteEntity(table, WriteEntity.WriteType.DDL_NO_LOCK), context);
    if (LlapHiveUtils.isLlapMode(context.getConf())) {
        TableName tableName = HiveTableName.of(table);
        ProactiveEviction.Request.Builder llapEvictRequestBuilder = ProactiveEviction.Request.Builder.create();
        llapEvictRequestBuilder.addTable(tableName.getDb(), tableName.getTable());
        ProactiveEviction.evict(context.getConf(), llapEvictRequestBuilder.build());
    }
    return 0;
}
Also used : Partition(org.apache.hadoop.hive.ql.metadata.Partition) HiveTableName(org.apache.hadoop.hive.ql.parse.HiveTableName) TableName(org.apache.hadoop.hive.common.TableName) ReplicationSpec(org.apache.hadoop.hive.ql.parse.ReplicationSpec) PartitionIterable(org.apache.hadoop.hive.ql.metadata.PartitionIterable) Table(org.apache.hadoop.hive.ql.metadata.Table) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity)

Example 4 with ReplicationSpec

use of org.apache.hadoop.hive.ql.parse.ReplicationSpec in project hive by apache.

the class DropTableAnalyzer method analyzeInternal.

@Override
public void analyzeInternal(ASTNode root) throws SemanticException {
    String tableName = getUnescapedName((ASTNode) root.getChild(0));
    boolean ifExists = (root.getFirstChildWithType(HiveParser.TOK_IFEXISTS) != null);
    boolean throwException = !ifExists && !HiveConf.getBoolVar(conf, ConfVars.DROP_IGNORES_NON_EXISTENT);
    Table table = getTable(tableName, throwException);
    if (table != null) {
        inputs.add(new ReadEntity(table));
        boolean tableWithSuffix = (HiveConf.getBoolVar(conf, ConfVars.HIVE_ACID_CREATE_TABLE_USE_SUFFIX) || HiveConf.getBoolVar(conf, ConfVars.HIVE_ACID_LOCKLESS_READS_ENABLED)) && AcidUtils.isTransactionalTable(table) && Boolean.parseBoolean(table.getProperty(SOFT_DELETE_TABLE));
        outputs.add(new WriteEntity(table, tableWithSuffix ? WriteType.DDL_EXCL_WRITE : WriteType.DDL_EXCLUSIVE));
    }
    boolean purge = (root.getFirstChildWithType(HiveParser.KW_PURGE) != null);
    ReplicationSpec replicationSpec = new ReplicationSpec(root);
    DropTableDesc desc = new DropTableDesc(tableName, ifExists, purge, replicationSpec);
    rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), desc)));
}
Also used : ReadEntity(org.apache.hadoop.hive.ql.hooks.ReadEntity) ReplicationSpec(org.apache.hadoop.hive.ql.parse.ReplicationSpec) Table(org.apache.hadoop.hive.ql.metadata.Table) DDLWork(org.apache.hadoop.hive.ql.ddl.DDLWork) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity)

Example 5 with ReplicationSpec

use of org.apache.hadoop.hive.ql.parse.ReplicationSpec in project hive by apache.

the class TruncateTableOperation method execute.

@Override
public int execute() throws HiveException {
    if (desc.getColumnIndexes() != null) {
        ColumnTruncateWork truncateWork = new ColumnTruncateWork(desc.getColumnIndexes(), desc.getInputDir(), desc.getOutputDir());
        truncateWork.setListBucketingCtx(desc.getLbCtx());
        truncateWork.setMapperCannotSpanPartns(true);
        TaskQueue taskQueue = new TaskQueue();
        ColumnTruncateTask taskExec = new ColumnTruncateTask();
        taskExec.initialize(context.getQueryState(), null, taskQueue, null);
        taskExec.setWork(truncateWork);
        taskExec.setQueryPlan(context.getQueryPlan());
        int ret = taskExec.execute();
        if (taskExec.getException() != null) {
            context.getTask().setException(taskExec.getException());
        }
        return ret;
    }
    String tableName = desc.getTableName();
    Map<String, String> partSpec = desc.getPartSpec();
    ReplicationSpec replicationSpec = desc.getReplicationSpec();
    if (!DDLUtils.allowOperationInReplicationScope(context.getDb(), tableName, partSpec, replicationSpec)) {
        // no truncate, the table is missing either due to drop/rename which follows the truncate.
        // or the existing table is newer than our update.
        LOG.debug("DDLTask: Truncate Table/Partition is skipped as table {} / partition {} is newer than update", tableName, (partSpec == null) ? "null" : FileUtils.makePartName(new ArrayList<>(partSpec.keySet()), new ArrayList<>(partSpec.values())));
        return 0;
    }
    try {
        context.getDb().truncateTable(tableName, partSpec, replicationSpec != null && replicationSpec.isInReplicationScope() ? desc.getWriteId() : 0L);
    } catch (Exception e) {
        throw new HiveException(e, ErrorMsg.GENERIC_ERROR);
    }
    return 0;
}
Also used : ReplicationSpec(org.apache.hadoop.hive.ql.parse.ReplicationSpec) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) ColumnTruncateTask(org.apache.hadoop.hive.ql.io.rcfile.truncate.ColumnTruncateTask) TaskQueue(org.apache.hadoop.hive.ql.TaskQueue) ColumnTruncateWork(org.apache.hadoop.hive.ql.io.rcfile.truncate.ColumnTruncateWork) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException)

Aggregations

ReplicationSpec (org.apache.hadoop.hive.ql.parse.ReplicationSpec)24 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)11 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)9 Table (org.apache.hadoop.hive.ql.metadata.Table)8 DDLWork (org.apache.hadoop.hive.ql.ddl.DDLWork)7 Database (org.apache.hadoop.hive.metastore.api.Database)6 WriteEntity (org.apache.hadoop.hive.ql.hooks.WriteEntity)6 ArrayList (java.util.ArrayList)5 Partition (org.apache.hadoop.hive.ql.metadata.Partition)5 IOException (java.io.IOException)4 ReadEntity (org.apache.hadoop.hive.ql.hooks.ReadEntity)4 InvalidTableException (org.apache.hadoop.hive.ql.metadata.InvalidTableException)4 Path (org.apache.hadoop.fs.Path)3 TableName (org.apache.hadoop.hive.common.TableName)3 NoSuchObjectException (org.apache.hadoop.hive.metastore.api.NoSuchObjectException)3 Task (org.apache.hadoop.hive.ql.exec.Task)3 FileNotFoundException (java.io.FileNotFoundException)2 HashMap (java.util.HashMap)2 LinkedHashMap (java.util.LinkedHashMap)2 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)2