Search in sources :

Example 91 with Partition

use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.

the class AlterPartitionHandler method handle.

@Override
public void handle(Context withinContext) throws Exception {
    LOG.info("Processing#{} ALTER_PARTITION message : {}", fromEventId(), event.getMessage());
    Table qlMdTable = new Table(tableObject);
    if (!Utils.shouldReplicate(withinContext.replicationSpec, qlMdTable, withinContext.hiveConf)) {
        return;
    }
    if (Scenario.ALTER == scenario) {
        withinContext.replicationSpec.setIsMetadataOnly(true);
        List<Partition> partitions = new ArrayList<>();
        partitions.add(new Partition(qlMdTable, after));
        Path metaDataPath = new Path(withinContext.eventRoot, EximUtil.METADATA_NAME);
        EximUtil.createExportDump(metaDataPath.getFileSystem(withinContext.hiveConf), metaDataPath, qlMdTable, partitions, withinContext.replicationSpec, withinContext.hiveConf);
    }
    DumpMetaData dmd = withinContext.createDmd(this);
    dmd.setPayload(event.getMessage());
    dmd.write();
}
Also used : Path(org.apache.hadoop.fs.Path) Partition(org.apache.hadoop.hive.ql.metadata.Partition) Table(org.apache.hadoop.hive.ql.metadata.Table) DumpMetaData(org.apache.hadoop.hive.ql.parse.repl.load.DumpMetaData) ArrayList(java.util.ArrayList)

Example 92 with Partition

use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.

the class InsertHandler method handle.

@Override
public void handle(Context withinContext) throws Exception {
    if (withinContext.hiveConf.getBoolVar(HiveConf.ConfVars.REPL_DUMP_METADATA_ONLY)) {
        return;
    }
    InsertMessage insertMsg = deserializer.getInsertMessage(event.getMessage());
    org.apache.hadoop.hive.ql.metadata.Table qlMdTable = tableObject(insertMsg);
    if (!Utils.shouldReplicate(withinContext.replicationSpec, qlMdTable, withinContext.hiveConf)) {
        return;
    }
    List<Partition> qlPtns = null;
    if (qlMdTable.isPartitioned() && (null != insertMsg.getPtnObj())) {
        qlPtns = Collections.singletonList(partitionObject(qlMdTable, insertMsg));
    }
    Path metaDataPath = new Path(withinContext.eventRoot, EximUtil.METADATA_NAME);
    // Mark the replace type based on INSERT-INTO or INSERT_OVERWRITE operation
    withinContext.replicationSpec.setIsReplace(insertMsg.isReplace());
    EximUtil.createExportDump(metaDataPath.getFileSystem(withinContext.hiveConf), metaDataPath, qlMdTable, qlPtns, withinContext.replicationSpec, withinContext.hiveConf);
    Iterable<String> files = insertMsg.getFiles();
    if (files != null) {
        Path dataPath;
        if ((null == qlPtns) || qlPtns.isEmpty()) {
            dataPath = new Path(withinContext.eventRoot, EximUtil.DATA_PATH_NAME);
        } else {
            /*
         * Insert into/overwrite operation shall operate on one or more partitions or even partitions from multiple
         * tables. But, Insert event is generated for each partition to which the data is inserted. So, qlPtns list
         * will have only one entry.
         */
            assert (1 == qlPtns.size());
            dataPath = new Path(withinContext.eventRoot, qlPtns.get(0).getName());
        }
        // encoded filename/checksum of files, write into _files
        try (BufferedWriter fileListWriter = writer(withinContext, dataPath)) {
            for (String file : files) {
                fileListWriter.write(file + "\n");
            }
        }
    }
    LOG.info("Processing#{} INSERT message : {}", fromEventId(), event.getMessage());
    DumpMetaData dmd = withinContext.createDmd(this);
    dmd.setPayload(event.getMessage());
    dmd.write();
}
Also used : Path(org.apache.hadoop.fs.Path) Partition(org.apache.hadoop.hive.ql.metadata.Partition) InsertMessage(org.apache.hadoop.hive.metastore.messaging.InsertMessage) DumpMetaData(org.apache.hadoop.hive.ql.parse.repl.load.DumpMetaData) BufferedWriter(java.io.BufferedWriter)

Example 93 with Partition

use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.

the class DDLSemanticAnalyzer method analyzeCacheMetadata.

private void analyzeCacheMetadata(ASTNode ast) throws SemanticException {
    Table tbl = AnalyzeCommandUtils.getTable(ast, this);
    Map<String, String> partSpec = null;
    CacheMetadataDesc desc;
    // In 2 cases out of 3, we could pass the path and type directly to metastore...
    if (AnalyzeCommandUtils.isPartitionLevelStats(ast)) {
        partSpec = AnalyzeCommandUtils.getPartKeyValuePairsFromAST(tbl, ast, conf);
        Partition part = getPartition(tbl, partSpec, true);
        desc = new CacheMetadataDesc(tbl.getDbName(), tbl.getTableName(), part.getName());
        inputs.add(new ReadEntity(part));
    } else {
        // Should we get all partitions for a partitioned table?
        desc = new CacheMetadataDesc(tbl.getDbName(), tbl.getTableName(), tbl.isPartitioned());
        inputs.add(new ReadEntity(tbl));
    }
    rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), desc)));
}
Also used : ReadEntity(org.apache.hadoop.hive.ql.hooks.ReadEntity) Partition(org.apache.hadoop.hive.ql.metadata.Partition) AlterTableExchangePartition(org.apache.hadoop.hive.ql.plan.AlterTableExchangePartition) Table(org.apache.hadoop.hive.ql.metadata.Table) DDLWork(org.apache.hadoop.hive.ql.plan.DDLWork) CacheMetadataDesc(org.apache.hadoop.hive.ql.plan.CacheMetadataDesc)

Example 94 with Partition

use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.

the class DDLSemanticAnalyzer method analyzeTruncateTable.

private void analyzeTruncateTable(ASTNode ast) throws SemanticException {
    // TOK_TABLE_PARTITION
    ASTNode root = (ASTNode) ast.getChild(0);
    String tableName = getUnescapedName((ASTNode) root.getChild(0));
    Table table = getTable(tableName, true);
    if (table.getTableType() != TableType.MANAGED_TABLE) {
        throw new SemanticException(ErrorMsg.TRUNCATE_FOR_NON_MANAGED_TABLE.format(tableName));
    }
    if (table.isNonNative()) {
        // TODO
        throw new SemanticException(ErrorMsg.TRUNCATE_FOR_NON_NATIVE_TABLE.format(tableName));
    }
    if (!table.isPartitioned() && root.getChildCount() > 1) {
        throw new SemanticException(ErrorMsg.PARTSPEC_FOR_NON_PARTITIONED_TABLE.format(tableName));
    }
    Map<String, String> partSpec = getPartSpec((ASTNode) root.getChild(1));
    if (partSpec == null) {
        if (!table.isPartitioned()) {
            outputs.add(new WriteEntity(table, WriteEntity.WriteType.DDL_EXCLUSIVE));
        } else {
            for (Partition partition : getPartitions(table, null, false)) {
                outputs.add(new WriteEntity(partition, WriteEntity.WriteType.DDL_EXCLUSIVE));
            }
        }
    } else {
        if (isFullSpec(table, partSpec)) {
            validatePartSpec(table, partSpec, (ASTNode) root.getChild(1), conf, true);
            Partition partition = getPartition(table, partSpec, true);
            outputs.add(new WriteEntity(partition, WriteEntity.WriteType.DDL_EXCLUSIVE));
        } else {
            validatePartSpec(table, partSpec, (ASTNode) root.getChild(1), conf, false);
            for (Partition partition : getPartitions(table, partSpec, false)) {
                outputs.add(new WriteEntity(partition, WriteEntity.WriteType.DDL_EXCLUSIVE));
            }
        }
    }
    TruncateTableDesc truncateTblDesc = new TruncateTableDesc(tableName, partSpec, null);
    DDLWork ddlWork = new DDLWork(getInputs(), getOutputs(), truncateTblDesc);
    Task<? extends Serializable> truncateTask = TaskFactory.get(ddlWork);
    // Is this a truncate column command
    List<String> columnNames = null;
    if (ast.getChildCount() == 2) {
        try {
            columnNames = getColumnNames((ASTNode) ast.getChild(1));
            // It would be possible to support this, but this is such a pointless command.
            if (AcidUtils.isInsertOnlyTable(table.getParameters())) {
                throw new SemanticException("Truncating MM table columns not presently supported");
            }
            List<String> bucketCols = null;
            Class<? extends InputFormat> inputFormatClass = null;
            boolean isArchived = false;
            Path newTblPartLoc = null;
            Path oldTblPartLoc = null;
            List<FieldSchema> cols = null;
            ListBucketingCtx lbCtx = null;
            boolean isListBucketed = false;
            List<String> listBucketColNames = null;
            if (table.isPartitioned()) {
                Partition part = db.getPartition(table, partSpec, false);
                Path tabPath = table.getPath();
                Path partPath = part.getDataLocation();
                // if the table is in a different dfs than the partition,
                // replace the partition's dfs with the table's dfs.
                newTblPartLoc = new Path(tabPath.toUri().getScheme(), tabPath.toUri().getAuthority(), partPath.toUri().getPath());
                oldTblPartLoc = partPath;
                cols = part.getCols();
                bucketCols = part.getBucketCols();
                inputFormatClass = part.getInputFormatClass();
                isArchived = ArchiveUtils.isArchived(part);
                lbCtx = constructListBucketingCtx(part.getSkewedColNames(), part.getSkewedColValues(), part.getSkewedColValueLocationMaps(), part.isStoredAsSubDirectories(), conf);
                isListBucketed = part.isStoredAsSubDirectories();
                listBucketColNames = part.getSkewedColNames();
            } else {
                // input and output are the same
                oldTblPartLoc = table.getPath();
                newTblPartLoc = table.getPath();
                cols = table.getCols();
                bucketCols = table.getBucketCols();
                inputFormatClass = table.getInputFormatClass();
                lbCtx = constructListBucketingCtx(table.getSkewedColNames(), table.getSkewedColValues(), table.getSkewedColValueLocationMaps(), table.isStoredAsSubDirectories(), conf);
                isListBucketed = table.isStoredAsSubDirectories();
                listBucketColNames = table.getSkewedColNames();
            }
            // throw a HiveException for non-rcfile.
            if (!inputFormatClass.equals(RCFileInputFormat.class)) {
                throw new SemanticException(ErrorMsg.TRUNCATE_COLUMN_NOT_RC.getMsg());
            }
            // throw a HiveException if the table/partition is archived
            if (isArchived) {
                throw new SemanticException(ErrorMsg.TRUNCATE_COLUMN_ARCHIVED.getMsg());
            }
            Set<Integer> columnIndexes = new HashSet<Integer>();
            for (String columnName : columnNames) {
                boolean found = false;
                for (int columnIndex = 0; columnIndex < cols.size(); columnIndex++) {
                    if (columnName.equalsIgnoreCase(cols.get(columnIndex).getName())) {
                        columnIndexes.add(columnIndex);
                        found = true;
                        break;
                    }
                }
                // Throw an exception if the user is trying to truncate a column which doesn't exist
                if (!found) {
                    throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(columnName));
                }
                // Throw an exception if the table/partition is bucketed on one of the columns
                for (String bucketCol : bucketCols) {
                    if (bucketCol.equalsIgnoreCase(columnName)) {
                        throw new SemanticException(ErrorMsg.TRUNCATE_BUCKETED_COLUMN.getMsg(columnName));
                    }
                }
                if (isListBucketed) {
                    for (String listBucketCol : listBucketColNames) {
                        if (listBucketCol.equalsIgnoreCase(columnName)) {
                            throw new SemanticException(ErrorMsg.TRUNCATE_LIST_BUCKETED_COLUMN.getMsg(columnName));
                        }
                    }
                }
            }
            truncateTblDesc.setColumnIndexes(new ArrayList<Integer>(columnIndexes));
            truncateTblDesc.setInputDir(oldTblPartLoc);
            truncateTblDesc.setLbCtx(lbCtx);
            addInputsOutputsAlterTable(tableName, partSpec, AlterTableTypes.TRUNCATE);
            ddlWork.setNeedLock(true);
            TableDesc tblDesc = Utilities.getTableDesc(table);
            // Write the output to temporary directory and move it to the final location at the end
            // so the operation is atomic.
            Path queryTmpdir = ctx.getExternalTmpPath(newTblPartLoc);
            truncateTblDesc.setOutputDir(queryTmpdir);
            LoadTableDesc ltd = new LoadTableDesc(queryTmpdir, tblDesc, partSpec == null ? new HashMap<>() : partSpec);
            ltd.setLbCtx(lbCtx);
            @SuppressWarnings("unchecked") Task<MoveWork> moveTsk = TaskFactory.get(new MoveWork(null, null, ltd, null, false));
            truncateTask.addDependentTask(moveTsk);
            // Recalculate the HDFS stats if auto gather stats is set
            if (conf.getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER)) {
                BasicStatsWork basicStatsWork;
                if (oldTblPartLoc.equals(newTblPartLoc)) {
                    // If we're merging to the same location, we can avoid some metastore calls
                    TableSpec tablepart = new TableSpec(this.db, conf, root);
                    basicStatsWork = new BasicStatsWork(tablepart);
                } else {
                    basicStatsWork = new BasicStatsWork(ltd);
                }
                basicStatsWork.setNoStatsAggregator(true);
                basicStatsWork.setClearAggregatorStats(true);
                StatsWork columnStatsWork = new StatsWork(table, basicStatsWork, conf);
                Task<? extends Serializable> statTask = TaskFactory.get(columnStatsWork);
                moveTsk.addDependentTask(statTask);
            }
        } catch (HiveException e) {
            throw new SemanticException(e);
        }
    }
    rootTasks.add(truncateTask);
}
Also used : MoveWork(org.apache.hadoop.hive.ql.plan.MoveWork) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) StatsWork(org.apache.hadoop.hive.ql.plan.StatsWork) BasicStatsWork(org.apache.hadoop.hive.ql.plan.BasicStatsWork) ListBucketingCtx(org.apache.hadoop.hive.ql.plan.ListBucketingCtx) BasicStatsWork(org.apache.hadoop.hive.ql.plan.BasicStatsWork) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity) HashSet(java.util.HashSet) Path(org.apache.hadoop.fs.Path) Partition(org.apache.hadoop.hive.ql.metadata.Partition) AlterTableExchangePartition(org.apache.hadoop.hive.ql.plan.AlterTableExchangePartition) Table(org.apache.hadoop.hive.ql.metadata.Table) TruncateTableDesc(org.apache.hadoop.hive.ql.plan.TruncateTableDesc) SQLUniqueConstraint(org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint) NotNullConstraint(org.apache.hadoop.hive.ql.metadata.NotNullConstraint) DefaultConstraint(org.apache.hadoop.hive.ql.metadata.DefaultConstraint) SQLCheckConstraint(org.apache.hadoop.hive.metastore.api.SQLCheckConstraint) SQLNotNullConstraint(org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) LoadTableDesc(org.apache.hadoop.hive.ql.plan.LoadTableDesc) DDLWork(org.apache.hadoop.hive.ql.plan.DDLWork) RCFileInputFormat(org.apache.hadoop.hive.ql.io.RCFileInputFormat) DescTableDesc(org.apache.hadoop.hive.ql.plan.DescTableDesc) LoadTableDesc(org.apache.hadoop.hive.ql.plan.LoadTableDesc) AlterTableDesc(org.apache.hadoop.hive.ql.plan.AlterTableDesc) UnlockTableDesc(org.apache.hadoop.hive.ql.plan.UnlockTableDesc) DropTableDesc(org.apache.hadoop.hive.ql.plan.DropTableDesc) ShowCreateTableDesc(org.apache.hadoop.hive.ql.plan.ShowCreateTableDesc) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) LockTableDesc(org.apache.hadoop.hive.ql.plan.LockTableDesc) TruncateTableDesc(org.apache.hadoop.hive.ql.plan.TruncateTableDesc)

Example 95 with Partition

use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.

the class DDLSemanticAnalyzer method analyzeExchangePartition.

private void analyzeExchangePartition(String[] qualified, ASTNode ast) throws SemanticException {
    Table destTable = getTable(qualified);
    Table sourceTable = getTable(getUnescapedName((ASTNode) ast.getChild(1)));
    // Get the partition specs
    Map<String, String> partSpecs = getValidatedPartSpec(sourceTable, (ASTNode) ast.getChild(0), conf, false);
    validatePartitionValues(partSpecs);
    boolean sameColumns = MetaStoreUtils.compareFieldColumns(destTable.getAllCols(), sourceTable.getAllCols());
    boolean samePartitions = MetaStoreUtils.compareFieldColumns(destTable.getPartitionKeys(), sourceTable.getPartitionKeys());
    if (!sameColumns || !samePartitions) {
        throw new SemanticException(ErrorMsg.TABLES_INCOMPATIBLE_SCHEMAS.getMsg());
    }
    // files with write IDs may not be valid. It may affect snapshot isolation for on-going txns as well.
    if (AcidUtils.isTransactionalTable(sourceTable) || AcidUtils.isTransactionalTable(destTable)) {
        throw new SemanticException(ErrorMsg.EXCHANGE_PARTITION_NOT_ALLOWED_WITH_TRANSACTIONAL_TABLES.getMsg());
    }
    // check if source partition exists
    getPartitions(sourceTable, partSpecs, true);
    // Verify that the partitions specified are continuous
    // If a subpartition value is specified without specifying a partition's value
    // then we throw an exception
    int counter = isPartitionValueContinuous(sourceTable.getPartitionKeys(), partSpecs);
    if (counter < 0) {
        throw new SemanticException(ErrorMsg.PARTITION_VALUE_NOT_CONTINUOUS.getMsg(partSpecs.toString()));
    }
    List<Partition> destPartitions = null;
    try {
        destPartitions = getPartitions(destTable, partSpecs, true);
    } catch (SemanticException ex) {
    // We should expect a semantic exception being throw as this partition
    // should not be present.
    }
    if (destPartitions != null) {
        // If any destination partition is present then throw a Semantic Exception.
        throw new SemanticException(ErrorMsg.PARTITION_EXISTS.getMsg(destPartitions.toString()));
    }
    AlterTableExchangePartition alterTableExchangePartition = new AlterTableExchangePartition(sourceTable, destTable, partSpecs);
    rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), alterTableExchangePartition)));
    inputs.add(new ReadEntity(sourceTable));
    outputs.add(new WriteEntity(destTable, WriteType.DDL_SHARED));
}
Also used : Partition(org.apache.hadoop.hive.ql.metadata.Partition) AlterTableExchangePartition(org.apache.hadoop.hive.ql.plan.AlterTableExchangePartition) Table(org.apache.hadoop.hive.ql.metadata.Table) SQLUniqueConstraint(org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint) NotNullConstraint(org.apache.hadoop.hive.ql.metadata.NotNullConstraint) DefaultConstraint(org.apache.hadoop.hive.ql.metadata.DefaultConstraint) SQLCheckConstraint(org.apache.hadoop.hive.metastore.api.SQLCheckConstraint) SQLNotNullConstraint(org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) ReadEntity(org.apache.hadoop.hive.ql.hooks.ReadEntity) DDLWork(org.apache.hadoop.hive.ql.plan.DDLWork) AlterTableExchangePartition(org.apache.hadoop.hive.ql.plan.AlterTableExchangePartition) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity)

Aggregations

Partition (org.apache.hadoop.hive.ql.metadata.Partition)102 Table (org.apache.hadoop.hive.ql.metadata.Table)56 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)48 ArrayList (java.util.ArrayList)43 Path (org.apache.hadoop.fs.Path)25 AlterTableExchangePartition (org.apache.hadoop.hive.ql.plan.AlterTableExchangePartition)25 WriteEntity (org.apache.hadoop.hive.ql.hooks.WriteEntity)24 IOException (java.io.IOException)18 HashMap (java.util.HashMap)18 LinkedHashMap (java.util.LinkedHashMap)18 ReadEntity (org.apache.hadoop.hive.ql.hooks.ReadEntity)18 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)18 PrunedPartitionList (org.apache.hadoop.hive.ql.parse.PrunedPartitionList)17 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)14 FileNotFoundException (java.io.FileNotFoundException)12 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)12 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)12 InvalidOperationException (org.apache.hadoop.hive.metastore.api.InvalidOperationException)11 SQLCheckConstraint (org.apache.hadoop.hive.metastore.api.SQLCheckConstraint)11 SQLDefaultConstraint (org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint)11