Search in sources :

Example 71 with WriteEntity

use of org.apache.hadoop.hive.ql.hooks.WriteEntity in project hive by apache.

the class DDLSemanticAnalyzer method analyzeTruncateTable.

private void analyzeTruncateTable(ASTNode ast) throws SemanticException {
    // TOK_TABLE_PARTITION
    ASTNode root = (ASTNode) ast.getChild(0);
    String tableName = getUnescapedName((ASTNode) root.getChild(0));
    Table table = getTable(tableName, true);
    if (table.getTableType() != TableType.MANAGED_TABLE) {
        throw new SemanticException(ErrorMsg.TRUNCATE_FOR_NON_MANAGED_TABLE.format(tableName));
    }
    if (table.isNonNative()) {
        // TODO
        throw new SemanticException(ErrorMsg.TRUNCATE_FOR_NON_NATIVE_TABLE.format(tableName));
    }
    if (!table.isPartitioned() && root.getChildCount() > 1) {
        throw new SemanticException(ErrorMsg.PARTSPEC_FOR_NON_PARTITIONED_TABLE.format(tableName));
    }
    Map<String, String> partSpec = getPartSpec((ASTNode) root.getChild(1));
    if (partSpec == null) {
        if (!table.isPartitioned()) {
            outputs.add(new WriteEntity(table, WriteEntity.WriteType.DDL_EXCLUSIVE));
        } else {
            for (Partition partition : getPartitions(table, null, false)) {
                outputs.add(new WriteEntity(partition, WriteEntity.WriteType.DDL_EXCLUSIVE));
            }
        }
    } else {
        if (isFullSpec(table, partSpec)) {
            validatePartSpec(table, partSpec, (ASTNode) root.getChild(1), conf, true);
            Partition partition = getPartition(table, partSpec, true);
            outputs.add(new WriteEntity(partition, WriteEntity.WriteType.DDL_EXCLUSIVE));
        } else {
            validatePartSpec(table, partSpec, (ASTNode) root.getChild(1), conf, false);
            for (Partition partition : getPartitions(table, partSpec, false)) {
                outputs.add(new WriteEntity(partition, WriteEntity.WriteType.DDL_EXCLUSIVE));
            }
        }
    }
    TruncateTableDesc truncateTblDesc = new TruncateTableDesc(tableName, partSpec, null);
    DDLWork ddlWork = new DDLWork(getInputs(), getOutputs(), truncateTblDesc);
    Task<? extends Serializable> truncateTask = TaskFactory.get(ddlWork);
    // Is this a truncate column command
    List<String> columnNames = null;
    if (ast.getChildCount() == 2) {
        try {
            columnNames = getColumnNames((ASTNode) ast.getChild(1));
            // It would be possible to support this, but this is such a pointless command.
            if (AcidUtils.isInsertOnlyTable(table.getParameters())) {
                throw new SemanticException("Truncating MM table columns not presently supported");
            }
            List<String> bucketCols = null;
            Class<? extends InputFormat> inputFormatClass = null;
            boolean isArchived = false;
            Path newTblPartLoc = null;
            Path oldTblPartLoc = null;
            List<FieldSchema> cols = null;
            ListBucketingCtx lbCtx = null;
            boolean isListBucketed = false;
            List<String> listBucketColNames = null;
            if (table.isPartitioned()) {
                Partition part = db.getPartition(table, partSpec, false);
                Path tabPath = table.getPath();
                Path partPath = part.getDataLocation();
                // if the table is in a different dfs than the partition,
                // replace the partition's dfs with the table's dfs.
                newTblPartLoc = new Path(tabPath.toUri().getScheme(), tabPath.toUri().getAuthority(), partPath.toUri().getPath());
                oldTblPartLoc = partPath;
                cols = part.getCols();
                bucketCols = part.getBucketCols();
                inputFormatClass = part.getInputFormatClass();
                isArchived = ArchiveUtils.isArchived(part);
                lbCtx = constructListBucketingCtx(part.getSkewedColNames(), part.getSkewedColValues(), part.getSkewedColValueLocationMaps(), part.isStoredAsSubDirectories(), conf);
                isListBucketed = part.isStoredAsSubDirectories();
                listBucketColNames = part.getSkewedColNames();
            } else {
                // input and output are the same
                oldTblPartLoc = table.getPath();
                newTblPartLoc = table.getPath();
                cols = table.getCols();
                bucketCols = table.getBucketCols();
                inputFormatClass = table.getInputFormatClass();
                lbCtx = constructListBucketingCtx(table.getSkewedColNames(), table.getSkewedColValues(), table.getSkewedColValueLocationMaps(), table.isStoredAsSubDirectories(), conf);
                isListBucketed = table.isStoredAsSubDirectories();
                listBucketColNames = table.getSkewedColNames();
            }
            // throw a HiveException for non-rcfile.
            if (!inputFormatClass.equals(RCFileInputFormat.class)) {
                throw new SemanticException(ErrorMsg.TRUNCATE_COLUMN_NOT_RC.getMsg());
            }
            // throw a HiveException if the table/partition is archived
            if (isArchived) {
                throw new SemanticException(ErrorMsg.TRUNCATE_COLUMN_ARCHIVED.getMsg());
            }
            Set<Integer> columnIndexes = new HashSet<Integer>();
            for (String columnName : columnNames) {
                boolean found = false;
                for (int columnIndex = 0; columnIndex < cols.size(); columnIndex++) {
                    if (columnName.equalsIgnoreCase(cols.get(columnIndex).getName())) {
                        columnIndexes.add(columnIndex);
                        found = true;
                        break;
                    }
                }
                // Throw an exception if the user is trying to truncate a column which doesn't exist
                if (!found) {
                    throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(columnName));
                }
                // Throw an exception if the table/partition is bucketed on one of the columns
                for (String bucketCol : bucketCols) {
                    if (bucketCol.equalsIgnoreCase(columnName)) {
                        throw new SemanticException(ErrorMsg.TRUNCATE_BUCKETED_COLUMN.getMsg(columnName));
                    }
                }
                if (isListBucketed) {
                    for (String listBucketCol : listBucketColNames) {
                        if (listBucketCol.equalsIgnoreCase(columnName)) {
                            throw new SemanticException(ErrorMsg.TRUNCATE_LIST_BUCKETED_COLUMN.getMsg(columnName));
                        }
                    }
                }
            }
            truncateTblDesc.setColumnIndexes(new ArrayList<Integer>(columnIndexes));
            truncateTblDesc.setInputDir(oldTblPartLoc);
            truncateTblDesc.setLbCtx(lbCtx);
            addInputsOutputsAlterTable(tableName, partSpec, AlterTableTypes.TRUNCATE);
            ddlWork.setNeedLock(true);
            TableDesc tblDesc = Utilities.getTableDesc(table);
            // Write the output to temporary directory and move it to the final location at the end
            // so the operation is atomic.
            Path queryTmpdir = ctx.getExternalTmpPath(newTblPartLoc);
            truncateTblDesc.setOutputDir(queryTmpdir);
            LoadTableDesc ltd = new LoadTableDesc(queryTmpdir, tblDesc, partSpec == null ? new HashMap<>() : partSpec);
            ltd.setLbCtx(lbCtx);
            @SuppressWarnings("unchecked") Task<MoveWork> moveTsk = TaskFactory.get(new MoveWork(null, null, ltd, null, false));
            truncateTask.addDependentTask(moveTsk);
            // Recalculate the HDFS stats if auto gather stats is set
            if (conf.getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER)) {
                BasicStatsWork basicStatsWork;
                if (oldTblPartLoc.equals(newTblPartLoc)) {
                    // If we're merging to the same location, we can avoid some metastore calls
                    TableSpec tablepart = new TableSpec(this.db, conf, root);
                    basicStatsWork = new BasicStatsWork(tablepart);
                } else {
                    basicStatsWork = new BasicStatsWork(ltd);
                }
                basicStatsWork.setNoStatsAggregator(true);
                basicStatsWork.setClearAggregatorStats(true);
                StatsWork columnStatsWork = new StatsWork(table, basicStatsWork, conf);
                Task<? extends Serializable> statTask = TaskFactory.get(columnStatsWork);
                moveTsk.addDependentTask(statTask);
            }
        } catch (HiveException e) {
            throw new SemanticException(e);
        }
    }
    rootTasks.add(truncateTask);
}
Also used : MoveWork(org.apache.hadoop.hive.ql.plan.MoveWork) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) StatsWork(org.apache.hadoop.hive.ql.plan.StatsWork) BasicStatsWork(org.apache.hadoop.hive.ql.plan.BasicStatsWork) ListBucketingCtx(org.apache.hadoop.hive.ql.plan.ListBucketingCtx) BasicStatsWork(org.apache.hadoop.hive.ql.plan.BasicStatsWork) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity) HashSet(java.util.HashSet) Path(org.apache.hadoop.fs.Path) Partition(org.apache.hadoop.hive.ql.metadata.Partition) AlterTableExchangePartition(org.apache.hadoop.hive.ql.plan.AlterTableExchangePartition) Table(org.apache.hadoop.hive.ql.metadata.Table) TruncateTableDesc(org.apache.hadoop.hive.ql.plan.TruncateTableDesc) SQLUniqueConstraint(org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint) NotNullConstraint(org.apache.hadoop.hive.ql.metadata.NotNullConstraint) DefaultConstraint(org.apache.hadoop.hive.ql.metadata.DefaultConstraint) SQLCheckConstraint(org.apache.hadoop.hive.metastore.api.SQLCheckConstraint) SQLNotNullConstraint(org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) LoadTableDesc(org.apache.hadoop.hive.ql.plan.LoadTableDesc) DDLWork(org.apache.hadoop.hive.ql.plan.DDLWork) RCFileInputFormat(org.apache.hadoop.hive.ql.io.RCFileInputFormat) DescTableDesc(org.apache.hadoop.hive.ql.plan.DescTableDesc) LoadTableDesc(org.apache.hadoop.hive.ql.plan.LoadTableDesc) AlterTableDesc(org.apache.hadoop.hive.ql.plan.AlterTableDesc) UnlockTableDesc(org.apache.hadoop.hive.ql.plan.UnlockTableDesc) DropTableDesc(org.apache.hadoop.hive.ql.plan.DropTableDesc) ShowCreateTableDesc(org.apache.hadoop.hive.ql.plan.ShowCreateTableDesc) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) LockTableDesc(org.apache.hadoop.hive.ql.plan.LockTableDesc) TruncateTableDesc(org.apache.hadoop.hive.ql.plan.TruncateTableDesc)

Example 72 with WriteEntity

use of org.apache.hadoop.hive.ql.hooks.WriteEntity in project hive by apache.

the class DDLSemanticAnalyzer method analyzeExchangePartition.

private void analyzeExchangePartition(String[] qualified, ASTNode ast) throws SemanticException {
    Table destTable = getTable(qualified);
    Table sourceTable = getTable(getUnescapedName((ASTNode) ast.getChild(1)));
    // Get the partition specs
    Map<String, String> partSpecs = getValidatedPartSpec(sourceTable, (ASTNode) ast.getChild(0), conf, false);
    validatePartitionValues(partSpecs);
    boolean sameColumns = MetaStoreUtils.compareFieldColumns(destTable.getAllCols(), sourceTable.getAllCols());
    boolean samePartitions = MetaStoreUtils.compareFieldColumns(destTable.getPartitionKeys(), sourceTable.getPartitionKeys());
    if (!sameColumns || !samePartitions) {
        throw new SemanticException(ErrorMsg.TABLES_INCOMPATIBLE_SCHEMAS.getMsg());
    }
    // files with write IDs may not be valid. It may affect snapshot isolation for on-going txns as well.
    if (AcidUtils.isTransactionalTable(sourceTable) || AcidUtils.isTransactionalTable(destTable)) {
        throw new SemanticException(ErrorMsg.EXCHANGE_PARTITION_NOT_ALLOWED_WITH_TRANSACTIONAL_TABLES.getMsg());
    }
    // check if source partition exists
    getPartitions(sourceTable, partSpecs, true);
    // Verify that the partitions specified are continuous
    // If a subpartition value is specified without specifying a partition's value
    // then we throw an exception
    int counter = isPartitionValueContinuous(sourceTable.getPartitionKeys(), partSpecs);
    if (counter < 0) {
        throw new SemanticException(ErrorMsg.PARTITION_VALUE_NOT_CONTINUOUS.getMsg(partSpecs.toString()));
    }
    List<Partition> destPartitions = null;
    try {
        destPartitions = getPartitions(destTable, partSpecs, true);
    } catch (SemanticException ex) {
    // We should expect a semantic exception being throw as this partition
    // should not be present.
    }
    if (destPartitions != null) {
        // If any destination partition is present then throw a Semantic Exception.
        throw new SemanticException(ErrorMsg.PARTITION_EXISTS.getMsg(destPartitions.toString()));
    }
    AlterTableExchangePartition alterTableExchangePartition = new AlterTableExchangePartition(sourceTable, destTable, partSpecs);
    rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), alterTableExchangePartition)));
    inputs.add(new ReadEntity(sourceTable));
    outputs.add(new WriteEntity(destTable, WriteType.DDL_SHARED));
}
Also used : Partition(org.apache.hadoop.hive.ql.metadata.Partition) AlterTableExchangePartition(org.apache.hadoop.hive.ql.plan.AlterTableExchangePartition) Table(org.apache.hadoop.hive.ql.metadata.Table) SQLUniqueConstraint(org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint) NotNullConstraint(org.apache.hadoop.hive.ql.metadata.NotNullConstraint) DefaultConstraint(org.apache.hadoop.hive.ql.metadata.DefaultConstraint) SQLCheckConstraint(org.apache.hadoop.hive.metastore.api.SQLCheckConstraint) SQLNotNullConstraint(org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) ReadEntity(org.apache.hadoop.hive.ql.hooks.ReadEntity) DDLWork(org.apache.hadoop.hive.ql.plan.DDLWork) AlterTableExchangePartition(org.apache.hadoop.hive.ql.plan.AlterTableExchangePartition) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity)

Example 73 with WriteEntity

use of org.apache.hadoop.hive.ql.hooks.WriteEntity in project hive by apache.

the class DDLSemanticAnalyzer method addAlterDbDesc.

private void addAlterDbDesc(AlterDatabaseDesc alterDesc) throws SemanticException {
    Database database = getDatabase(alterDesc.getDatabaseName());
    outputs.add(new WriteEntity(database, WriteEntity.WriteType.DDL_NO_LOCK));
    rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), alterDesc)));
}
Also used : DDLWork(org.apache.hadoop.hive.ql.plan.DDLWork) Database(org.apache.hadoop.hive.metastore.api.Database) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity)

Example 74 with WriteEntity

use of org.apache.hadoop.hive.ql.hooks.WriteEntity in project hive by apache.

the class DDLSemanticAnalyzer method analyzeAlterMaterializedViewRewrite.

private void analyzeAlterMaterializedViewRewrite(String mvName, ASTNode ast) throws SemanticException {
    // Value for the flag
    boolean enableFlag;
    switch(ast.getChild(0).getType()) {
        case HiveParser.TOK_REWRITE_ENABLED:
            enableFlag = true;
            break;
        case HiveParser.TOK_REWRITE_DISABLED:
            enableFlag = false;
            break;
        default:
            throw new SemanticException("Invalid alter materialized view expression");
    }
    AlterMaterializedViewDesc alterMVDesc = new AlterMaterializedViewDesc(AlterMaterializedViewTypes.UPDATE_REWRITE_FLAG);
    alterMVDesc.setMaterializedViewName(mvName);
    alterMVDesc.setRewriteEnableFlag(enableFlag);
    // It can be fully qualified name or use default database
    Table materializedViewTable = getTable(mvName, true);
    // only uses transactional (MM and ACID) tables
    if (enableFlag) {
        for (String tableName : materializedViewTable.getCreationMetadata().getTablesUsed()) {
            Table table = getTable(tableName, true);
            if (!AcidUtils.isTransactionalTable(table)) {
                throw new SemanticException("Automatic rewriting for materialized view cannot " + "be enabled if the materialized view uses non-transactional tables");
            }
        }
    }
    inputs.add(new ReadEntity(materializedViewTable));
    outputs.add(new WriteEntity(materializedViewTable, WriteEntity.WriteType.DDL_EXCLUSIVE));
    rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), alterMVDesc)));
}
Also used : ReadEntity(org.apache.hadoop.hive.ql.hooks.ReadEntity) Table(org.apache.hadoop.hive.ql.metadata.Table) DDLWork(org.apache.hadoop.hive.ql.plan.DDLWork) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity) AlterMaterializedViewDesc(org.apache.hadoop.hive.ql.plan.AlterMaterializedViewDesc)

Example 75 with WriteEntity

use of org.apache.hadoop.hive.ql.hooks.WriteEntity in project hive by apache.

the class DDLSemanticAnalyzer method analyzeAltertableSkewedby.

/**
 * Analyze alter table's skewed table
 *
 * @param ast
 *          node
 * @throws SemanticException
 */
private void analyzeAltertableSkewedby(String[] qualified, ASTNode ast) throws SemanticException {
    /**
     * Throw an error if the user tries to use the DDL with
     * hive.internal.ddl.list.bucketing.enable set to false.
     */
    HiveConf hiveConf = SessionState.get().getConf();
    Table tab = getTable(qualified);
    inputs.add(new ReadEntity(tab));
    outputs.add(new WriteEntity(tab, WriteEntity.WriteType.DDL_EXCLUSIVE));
    validateAlterTableType(tab, AlterTableTypes.ADDSKEWEDBY);
    String tableName = getDotName(qualified);
    if (ast.getChildCount() == 0) {
        /* Convert a skewed table to non-skewed table. */
        AlterTableDesc alterTblDesc = new AlterTableDesc(tableName, true, new ArrayList<String>(), new ArrayList<List<String>>());
        alterTblDesc.setStoredAsSubDirectories(false);
        rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), alterTblDesc)));
    } else {
        switch(((ASTNode) ast.getChild(0)).getToken().getType()) {
            case HiveParser.TOK_TABLESKEWED:
                handleAlterTableSkewedBy(ast, tableName, tab);
                break;
            case HiveParser.TOK_STOREDASDIRS:
                handleAlterTableDisableStoredAsDirs(tableName, tab);
                break;
            default:
                assert false;
        }
    }
}
Also used : ReadEntity(org.apache.hadoop.hive.ql.hooks.ReadEntity) AlterTableDesc(org.apache.hadoop.hive.ql.plan.AlterTableDesc) Table(org.apache.hadoop.hive.ql.metadata.Table) DDLWork(org.apache.hadoop.hive.ql.plan.DDLWork) HiveConf(org.apache.hadoop.hive.conf.HiveConf) ArrayList(java.util.ArrayList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) LinkedList(java.util.LinkedList) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity)

Aggregations

WriteEntity (org.apache.hadoop.hive.ql.hooks.WriteEntity)88 Table (org.apache.hadoop.hive.ql.metadata.Table)39 ReadEntity (org.apache.hadoop.hive.ql.hooks.ReadEntity)35 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)24 Partition (org.apache.hadoop.hive.ql.metadata.Partition)24 ArrayList (java.util.ArrayList)18 DDLWork (org.apache.hadoop.hive.ql.plan.DDLWork)14 Path (org.apache.hadoop.fs.Path)13 AlterTableExchangePartition (org.apache.hadoop.hive.ql.plan.AlterTableExchangePartition)13 Referenceable (org.apache.atlas.typesystem.Referenceable)11 Database (org.apache.hadoop.hive.metastore.api.Database)11 Test (org.junit.Test)11 QueryPlan (org.apache.hadoop.hive.ql.QueryPlan)10 HashMap (java.util.HashMap)9 LinkedHashMap (java.util.LinkedHashMap)9 Test (org.testng.annotations.Test)9 SQLCheckConstraint (org.apache.hadoop.hive.metastore.api.SQLCheckConstraint)8 SQLDefaultConstraint (org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint)8 SQLNotNullConstraint (org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint)8 SQLUniqueConstraint (org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint)8