Search in sources :

Example 96 with Partition

use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.

the class DDLSemanticAnalyzer method addTablePartsOutputs.

/**
 * Add the table partitions to be modified in the output, so that it is available for the
 * pre-execution hook. If the partition does not exist, throw an error if
 * throwIfNonExistent is true, otherwise ignore it.
 */
private void addTablePartsOutputs(Table table, List<Map<String, String>> partSpecs, boolean throwIfNonExistent, boolean allowMany, ASTNode ast, WriteEntity.WriteType writeType) throws SemanticException {
    Iterator<Map<String, String>> i;
    int index;
    for (i = partSpecs.iterator(), index = 1; i.hasNext(); ++index) {
        Map<String, String> partSpec = i.next();
        List<Partition> parts = null;
        if (allowMany) {
            try {
                parts = db.getPartitions(table, partSpec);
            } catch (HiveException e) {
                LOG.error("Got HiveException during obtaining list of partitions" + StringUtils.stringifyException(e));
                throw new SemanticException(e.getMessage(), e);
            }
        } else {
            parts = new ArrayList<Partition>();
            try {
                Partition p = db.getPartition(table, partSpec, false);
                if (p != null) {
                    parts.add(p);
                }
            } catch (HiveException e) {
                LOG.debug("Wrong specification" + StringUtils.stringifyException(e));
                throw new SemanticException(e.getMessage(), e);
            }
        }
        if (parts.isEmpty()) {
            if (throwIfNonExistent) {
                throw new SemanticException(ErrorMsg.INVALID_PARTITION.getMsg(ast.getChild(index)));
            }
        }
        for (Partition p : parts) {
            // Don't request any locks here, as the table has already been locked.
            outputs.add(new WriteEntity(p, writeType));
        }
    }
}
Also used : Partition(org.apache.hadoop.hive.ql.metadata.Partition) AlterTableExchangePartition(org.apache.hadoop.hive.ql.plan.AlterTableExchangePartition) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) Map(java.util.Map) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity) SQLUniqueConstraint(org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint) NotNullConstraint(org.apache.hadoop.hive.ql.metadata.NotNullConstraint) DefaultConstraint(org.apache.hadoop.hive.ql.metadata.DefaultConstraint) SQLCheckConstraint(org.apache.hadoop.hive.metastore.api.SQLCheckConstraint) SQLNotNullConstraint(org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint)

Example 97 with Partition

use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.

the class DDLSemanticAnalyzer method addTableDropPartsOutputs.

/**
 * Add the table partitions to be modified in the output, so that it is available for the
 * pre-execution hook. If the partition does not exist, throw an error if
 * throwIfNonExistent is true, otherwise ignore it.
 */
private void addTableDropPartsOutputs(Table tab, Collection<List<ExprNodeGenericFuncDesc>> partSpecs, boolean throwIfNonExistent) throws SemanticException {
    for (List<ExprNodeGenericFuncDesc> specs : partSpecs) {
        for (ExprNodeGenericFuncDesc partSpec : specs) {
            List<Partition> parts = new ArrayList<Partition>();
            boolean hasUnknown = false;
            try {
                hasUnknown = db.getPartitionsByExpr(tab, partSpec, conf, parts);
            } catch (Exception e) {
                throw new SemanticException(ErrorMsg.INVALID_PARTITION.getMsg(partSpec.getExprString()), e);
            }
            if (hasUnknown) {
                throw new SemanticException("Unexpected unknown partitions for " + partSpec.getExprString());
            }
            // earlier... If we get rid of output, we can get rid of this.
            if (parts.isEmpty()) {
                if (throwIfNonExistent) {
                    throw new SemanticException(ErrorMsg.INVALID_PARTITION.getMsg(partSpec.getExprString()));
                }
            }
            for (Partition p : parts) {
                outputs.add(new WriteEntity(p, WriteEntity.WriteType.DDL_EXCLUSIVE));
            }
        }
    }
}
Also used : Partition(org.apache.hadoop.hive.ql.metadata.Partition) AlterTableExchangePartition(org.apache.hadoop.hive.ql.plan.AlterTableExchangePartition) ArrayList(java.util.ArrayList) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity) LockException(org.apache.hadoop.hive.ql.lockmgr.LockException) InvocationTargetException(java.lang.reflect.InvocationTargetException) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) URISyntaxException(java.net.URISyntaxException) FileNotFoundException(java.io.FileNotFoundException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) InvalidTableException(org.apache.hadoop.hive.ql.metadata.InvalidTableException)

Example 98 with Partition

use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.

the class LoadSemanticAnalyzer method analyzeInternal.

@Override
public void analyzeInternal(ASTNode ast) throws SemanticException {
    boolean isLocal = false;
    boolean isOverWrite = false;
    Tree fromTree = ast.getChild(0);
    Tree tableTree = ast.getChild(1);
    if (ast.getChildCount() == 4) {
        isLocal = true;
        isOverWrite = true;
    }
    if (ast.getChildCount() == 3) {
        if (ast.getChild(2).getText().toLowerCase().equals("local")) {
            isLocal = true;
        } else {
            isOverWrite = true;
        }
    }
    // initialize load path
    URI fromURI;
    try {
        String fromPath = stripQuotes(fromTree.getText());
        fromURI = initializeFromURI(fromPath, isLocal);
    } catch (IOException e) {
        throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(fromTree, e.getMessage()), e);
    } catch (URISyntaxException e) {
        throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(fromTree, e.getMessage()), e);
    }
    // initialize destination table/partition
    TableSpec ts = new TableSpec(db, conf, (ASTNode) tableTree);
    if (ts.tableHandle.isView() || ts.tableHandle.isMaterializedView()) {
        throw new SemanticException(ErrorMsg.DML_AGAINST_VIEW.getMsg());
    }
    if (ts.tableHandle.isNonNative()) {
        throw new SemanticException(ErrorMsg.LOAD_INTO_NON_NATIVE.getMsg());
    }
    if (ts.tableHandle.isStoredAsSubDirectories()) {
        throw new SemanticException(ErrorMsg.LOAD_INTO_STORED_AS_DIR.getMsg());
    }
    List<FieldSchema> parts = ts.tableHandle.getPartitionKeys();
    if ((parts != null && parts.size() > 0) && (ts.partSpec == null || ts.partSpec.size() == 0)) {
        throw new SemanticException(ErrorMsg.NEED_PARTITION_ERROR.getMsg());
    }
    List<String> bucketCols = ts.tableHandle.getBucketCols();
    if (bucketCols != null && !bucketCols.isEmpty()) {
        String error = StrictChecks.checkBucketing(conf);
        if (error != null) {
            throw new SemanticException("Please load into an intermediate table" + " and use 'insert... select' to allow Hive to enforce bucketing. " + error);
        }
    }
    // make sure the arguments make sense
    List<FileStatus> files = applyConstraintsAndGetFiles(fromURI, fromTree, isLocal, ts.tableHandle);
    // for managed tables, make sure the file formats match
    if (TableType.MANAGED_TABLE.equals(ts.tableHandle.getTableType()) && conf.getBoolVar(HiveConf.ConfVars.HIVECHECKFILEFORMAT)) {
        ensureFileFormatsMatch(ts, files, fromURI);
    }
    inputs.add(toReadEntity(new Path(fromURI)));
    Task<? extends Serializable> rTask = null;
    // create final load/move work
    boolean preservePartitionSpecs = false;
    Map<String, String> partSpec = ts.getPartSpec();
    if (partSpec == null) {
        partSpec = new LinkedHashMap<String, String>();
        outputs.add(new WriteEntity(ts.tableHandle, (isOverWrite ? WriteEntity.WriteType.INSERT_OVERWRITE : WriteEntity.WriteType.INSERT)));
    } else {
        try {
            Partition part = Hive.get().getPartition(ts.tableHandle, partSpec, false);
            if (part != null) {
                if (isOverWrite) {
                    outputs.add(new WriteEntity(part, WriteEntity.WriteType.INSERT_OVERWRITE));
                } else {
                    outputs.add(new WriteEntity(part, WriteEntity.WriteType.INSERT));
                    // If partition already exists and we aren't overwriting it, then respect
                    // its current location info rather than picking it from the parent TableDesc
                    preservePartitionSpecs = true;
                }
            } else {
                outputs.add(new WriteEntity(ts.tableHandle, (isOverWrite ? WriteEntity.WriteType.INSERT_OVERWRITE : WriteEntity.WriteType.INSERT)));
            }
        } catch (HiveException e) {
            throw new SemanticException(e);
        }
    }
    Long writeId = null;
    int stmtId = -1;
    if (AcidUtils.isTransactionalTable(ts.tableHandle)) {
        try {
            writeId = SessionState.get().getTxnMgr().getTableWriteId(ts.tableHandle.getDbName(), ts.tableHandle.getTableName());
        } catch (LockException ex) {
            throw new SemanticException("Failed to allocate the write id", ex);
        }
        stmtId = SessionState.get().getTxnMgr().getStmtIdAndIncrement();
    }
    // Note: this sets LoadFileType incorrectly for ACID; is that relevant for load?
    // See setLoadFileType and setIsAcidIow calls elsewhere for an example.
    LoadTableDesc loadTableWork = new LoadTableDesc(new Path(fromURI), Utilities.getTableDesc(ts.tableHandle), partSpec, isOverWrite ? LoadFileType.REPLACE_ALL : LoadFileType.KEEP_EXISTING, writeId);
    loadTableWork.setStmtId(stmtId);
    if (preservePartitionSpecs) {
        // Note : preservePartitionSpecs=true implies inheritTableSpecs=false but
        // but preservePartitionSpecs=false(default) here is not sufficient enough
        // info to set inheritTableSpecs=true
        loadTableWork.setInheritTableSpecs(false);
    }
    Task<? extends Serializable> childTask = TaskFactory.get(new MoveWork(getInputs(), getOutputs(), loadTableWork, null, true, isLocal));
    if (rTask != null) {
        rTask.addDependentTask(childTask);
    } else {
        rTask = childTask;
    }
    rootTasks.add(rTask);
    // The user asked for stats to be collected.
    // Some stats like number of rows require a scan of the data
    // However, some other stats, like number of files, do not require a complete scan
    // Update the stats which do not require a complete scan.
    Task<? extends Serializable> statTask = null;
    if (conf.getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER)) {
        BasicStatsWork basicStatsWork = new BasicStatsWork(loadTableWork);
        basicStatsWork.setNoStatsAggregator(true);
        basicStatsWork.setClearAggregatorStats(true);
        StatsWork columnStatsWork = new StatsWork(ts.tableHandle, basicStatsWork, conf);
        statTask = TaskFactory.get(columnStatsWork);
    }
    if (statTask != null) {
        childTask.addDependentTask(statTask);
    }
}
Also used : MoveWork(org.apache.hadoop.hive.ql.plan.MoveWork) FileStatus(org.apache.hadoop.fs.FileStatus) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) URISyntaxException(java.net.URISyntaxException) URI(java.net.URI) StatsWork(org.apache.hadoop.hive.ql.plan.StatsWork) BasicStatsWork(org.apache.hadoop.hive.ql.plan.BasicStatsWork) LockException(org.apache.hadoop.hive.ql.lockmgr.LockException) Tree(org.antlr.runtime.tree.Tree) BasicStatsWork(org.apache.hadoop.hive.ql.plan.BasicStatsWork) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity) Path(org.apache.hadoop.fs.Path) Partition(org.apache.hadoop.hive.ql.metadata.Partition) IOException(java.io.IOException) LoadTableDesc(org.apache.hadoop.hive.ql.plan.LoadTableDesc)

Example 99 with Partition

use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.

the class ProcessAnalyzeTable method process.

@SuppressWarnings("unchecked")
@Override
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procContext, Object... nodeOutputs) throws SemanticException {
    GenTezProcContext context = (GenTezProcContext) procContext;
    TableScanOperator tableScan = (TableScanOperator) nd;
    ParseContext parseContext = context.parseContext;
    Table table = tableScan.getConf().getTableMetadata();
    Class<? extends InputFormat> inputFormat = table.getInputFormatClass();
    if (parseContext.getQueryProperties().isAnalyzeCommand()) {
        assert tableScan.getChildOperators() == null || tableScan.getChildOperators().size() == 0;
        String alias = null;
        for (String a : parseContext.getTopOps().keySet()) {
            if (tableScan == parseContext.getTopOps().get(a)) {
                alias = a;
            }
        }
        assert alias != null;
        TezWork tezWork = context.currentTask.getWork();
        if (OrcInputFormat.class.isAssignableFrom(inputFormat) || MapredParquetInputFormat.class.isAssignableFrom(inputFormat)) {
            // For ORC & Parquet, all the following statements are the same
            // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS
            // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS noscan;
            // There will not be any Tez job above this task
            StatsWork statWork = new StatsWork(table, parseContext.getConf());
            statWork.setFooterScan();
            // If partition is specified, get pruned partition list
            Set<Partition> confirmedParts = GenMapRedUtils.getConfirmedPartitionsForScan(tableScan);
            if (confirmedParts.size() > 0) {
                List<String> partCols = GenMapRedUtils.getPartitionColumns(tableScan);
                PrunedPartitionList partList = new PrunedPartitionList(table, confirmedParts, partCols, false);
                statWork.addInputPartitions(partList.getPartitions());
            }
            Task<StatsWork> snjTask = TaskFactory.get(statWork);
            snjTask.setParentTasks(null);
            context.rootTasks.remove(context.currentTask);
            context.rootTasks.add(snjTask);
            return true;
        } else {
            // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS;
            // The plan consists of a simple TezTask followed by a StatsTask.
            // The Tez task is just a simple TableScanOperator
            BasicStatsWork basicStatsWork = new BasicStatsWork(table.getTableSpec());
            basicStatsWork.setNoScanAnalyzeCommand(parseContext.getQueryProperties().isNoScanAnalyzeCommand());
            StatsWork columnStatsWork = new StatsWork(table, basicStatsWork, parseContext.getConf());
            columnStatsWork.collectStatsFromAggregator(tableScan.getConf());
            columnStatsWork.setSourceTask(context.currentTask);
            Task<StatsWork> statsTask = TaskFactory.get(columnStatsWork);
            context.currentTask.addDependentTask(statsTask);
            // The plan consists of a StatsTask only.
            if (parseContext.getQueryProperties().isNoScanAnalyzeCommand()) {
                statsTask.setParentTasks(null);
                context.rootTasks.remove(context.currentTask);
                context.rootTasks.add(statsTask);
            }
            // NOTE: here we should use the new partition predicate pushdown API to
            // get a list of pruned list,
            // and pass it to setTaskPlan as the last parameter
            Set<Partition> confirmedPartns = GenMapRedUtils.getConfirmedPartitionsForScan(tableScan);
            PrunedPartitionList partitions = null;
            if (confirmedPartns.size() > 0) {
                List<String> partCols = GenMapRedUtils.getPartitionColumns(tableScan);
                partitions = new PrunedPartitionList(table, confirmedPartns, partCols, false);
            }
            MapWork w = utils.createMapWork(context, tableScan, tezWork, partitions);
            w.setGatheringStats(true);
            return true;
        }
    }
    return null;
}
Also used : Partition(org.apache.hadoop.hive.ql.metadata.Partition) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Table(org.apache.hadoop.hive.ql.metadata.Table) MapredParquetInputFormat(org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat) StatsWork(org.apache.hadoop.hive.ql.plan.StatsWork) BasicStatsWork(org.apache.hadoop.hive.ql.plan.BasicStatsWork) MapWork(org.apache.hadoop.hive.ql.plan.MapWork) OrcInputFormat(org.apache.hadoop.hive.ql.io.orc.OrcInputFormat) BasicStatsWork(org.apache.hadoop.hive.ql.plan.BasicStatsWork) TezWork(org.apache.hadoop.hive.ql.plan.TezWork)

Example 100 with Partition

use of org.apache.hadoop.hive.ql.metadata.Partition in project hive by apache.

the class TableSerializer method writePartitions.

private void writePartitions(JsonWriter writer, ReplicationSpec additionalPropertiesProvider) throws SemanticException, IOException {
    writer.jsonGenerator.writeStartArray();
    if (partitions != null) {
        for (org.apache.hadoop.hive.ql.metadata.Partition partition : partitions) {
            new PartitionSerializer(partition.getTPartition()).writeTo(writer, additionalPropertiesProvider);
        }
    }
    writer.jsonGenerator.writeEndArray();
}
Also used : Partition(org.apache.hadoop.hive.ql.metadata.Partition)

Aggregations

Partition (org.apache.hadoop.hive.ql.metadata.Partition)102 Table (org.apache.hadoop.hive.ql.metadata.Table)56 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)48 ArrayList (java.util.ArrayList)43 Path (org.apache.hadoop.fs.Path)25 AlterTableExchangePartition (org.apache.hadoop.hive.ql.plan.AlterTableExchangePartition)25 WriteEntity (org.apache.hadoop.hive.ql.hooks.WriteEntity)24 IOException (java.io.IOException)18 HashMap (java.util.HashMap)18 LinkedHashMap (java.util.LinkedHashMap)18 ReadEntity (org.apache.hadoop.hive.ql.hooks.ReadEntity)18 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)18 PrunedPartitionList (org.apache.hadoop.hive.ql.parse.PrunedPartitionList)17 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)14 FileNotFoundException (java.io.FileNotFoundException)12 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)12 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)12 InvalidOperationException (org.apache.hadoop.hive.metastore.api.InvalidOperationException)11 SQLCheckConstraint (org.apache.hadoop.hive.metastore.api.SQLCheckConstraint)11 SQLDefaultConstraint (org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint)11