Search in sources :

Example 6 with PrunedPartitionList

use of org.apache.hadoop.hive.ql.parse.PrunedPartitionList in project hive by apache.

the class HiveMetaStoreChecker method checkTable.

/**
   * Check the metastore for inconsistencies, data missing in either the
   * metastore or on the dfs.
   *
   * @param dbName
   *          Name of the database
   * @param tableName
   *          Name of the table
   * @param partitions
   *          Partitions to check, if null or empty get all the partitions.
   * @param result
   *          Result object
   * @throws HiveException
   *           Failed to get required information from the metastore.
   * @throws IOException
   *           Most likely filesystem related
   * @throws MetaException
   *           Failed to get required information from the metastore.
   */
void checkTable(String dbName, String tableName, List<? extends Map<String, String>> partitions, CheckResult result) throws MetaException, IOException, HiveException {
    Table table = null;
    try {
        table = hive.getTable(dbName, tableName);
    } catch (HiveException e) {
        result.getTablesNotInMs().add(tableName);
        return;
    }
    List<Partition> parts = new ArrayList<Partition>();
    boolean findUnknownPartitions = true;
    if (table.isPartitioned()) {
        if (partitions == null || partitions.isEmpty()) {
            PrunedPartitionList prunedPartList = PartitionPruner.prune(table, null, conf, toString(), null);
            // no partitions specified, let's get all
            parts.addAll(prunedPartList.getPartitions());
        } else {
            // we're interested in specific partitions,
            // don't check for any others
            findUnknownPartitions = false;
            for (Map<String, String> map : partitions) {
                Partition part = hive.getPartition(table, map, false);
                if (part == null) {
                    PartitionResult pr = new PartitionResult();
                    pr.setTableName(tableName);
                    pr.setPartitionName(Warehouse.makePartPath(map));
                    result.getPartitionsNotInMs().add(pr);
                } else {
                    parts.add(part);
                }
            }
        }
    }
    checkTable(table, parts, findUnknownPartitions, result);
}
Also used : PrunedPartitionList(org.apache.hadoop.hive.ql.parse.PrunedPartitionList) ArrayList(java.util.ArrayList) PartitionResult(org.apache.hadoop.hive.ql.metadata.CheckResult.PartitionResult)

Example 7 with PrunedPartitionList

use of org.apache.hadoop.hive.ql.parse.PrunedPartitionList in project hive by apache.

the class PartitionPruner method prune.

/**
   * Get the partition list for the table that satisfies the partition pruner
   * condition.
   *
   * @param tab
   *          the table object for the alias
   * @param prunerExpr
   *          the pruner expression for the alias
   * @param conf
   *          for checking whether "strict" mode is on.
   * @param alias
   *          for generating error message only.
   * @param prunedPartitionsMap
   *          cached result for the table
   * @return the partition list for the table that satisfies the partition
   *         pruner condition.
   * @throws SemanticException
   */
public static PrunedPartitionList prune(Table tab, ExprNodeDesc prunerExpr, HiveConf conf, String alias, Map<String, PrunedPartitionList> prunedPartitionsMap) throws SemanticException {
    if (LOG.isTraceEnabled()) {
        LOG.trace("Started pruning partition");
        LOG.trace("dbname = " + tab.getDbName());
        LOG.trace("tabname = " + tab.getTableName());
        LOG.trace("prune Expression = " + (prunerExpr == null ? "" : prunerExpr));
    }
    String key = tab.getDbName() + "." + tab.getTableName() + ";";
    if (!tab.isPartitioned()) {
        // If the table is not partitioned, return empty list.
        return getAllPartsFromCacheOrServer(tab, key, false, prunedPartitionsMap);
    }
    if (!hasColumnExpr(prunerExpr)) {
        // If the "strict" mode is on, we have to provide partition pruner for each table.
        String error = StrictChecks.checkNoPartitionFilter(conf);
        if (error != null) {
            throw new SemanticException(error + " No partition predicate for Alias \"" + alias + "\" Table \"" + tab.getTableName() + "\"");
        }
    }
    if (prunerExpr == null) {
        // In non-strict mode and there is no predicates at all - get everything.
        return getAllPartsFromCacheOrServer(tab, key, false, prunedPartitionsMap);
    }
    Set<String> partColsUsedInFilter = new LinkedHashSet<String>();
    // Replace virtual columns with nulls. See javadoc for details.
    prunerExpr = removeNonPartCols(prunerExpr, extractPartColNames(tab), partColsUsedInFilter);
    // Remove all parts that are not partition columns. See javadoc for details.
    ExprNodeDesc compactExpr = compactExpr(prunerExpr.clone());
    String oldFilter = prunerExpr.getExprString();
    if (compactExpr == null || isBooleanExpr(compactExpr)) {
        if (isFalseExpr(compactExpr)) {
            return new PrunedPartitionList(tab, new LinkedHashSet<Partition>(0), new ArrayList<String>(0), false);
        }
        // For null and true values, return every partition
        return getAllPartsFromCacheOrServer(tab, key, true, prunedPartitionsMap);
    }
    if (LOG.isDebugEnabled()) {
        LOG.debug("Filter w/ compacting: " + compactExpr.getExprString() + "; filter w/o compacting: " + oldFilter);
    }
    key = key + compactExpr.getExprString();
    PrunedPartitionList ppList = prunedPartitionsMap.get(key);
    if (ppList != null) {
        return ppList;
    }
    ppList = getPartitionsFromServer(tab, (ExprNodeGenericFuncDesc) compactExpr, conf, alias, partColsUsedInFilter, oldFilter.equals(compactExpr.getExprString()));
    prunedPartitionsMap.put(key, ppList);
    return ppList;
}
Also used : LinkedHashSet(java.util.LinkedHashSet) Partition(org.apache.hadoop.hive.ql.metadata.Partition) PrunedPartitionList(org.apache.hadoop.hive.ql.parse.PrunedPartitionList) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException)

Example 8 with PrunedPartitionList

use of org.apache.hadoop.hive.ql.parse.PrunedPartitionList in project hive by apache.

the class PartitionPruner method getAllPartsFromCacheOrServer.

private static PrunedPartitionList getAllPartsFromCacheOrServer(Table tab, String key, boolean unknownPartitions, Map<String, PrunedPartitionList> partsCache) throws SemanticException {
    PrunedPartitionList ppList = partsCache == null ? null : partsCache.get(key);
    if (ppList != null) {
        return ppList;
    }
    Set<Partition> parts;
    try {
        parts = getAllPartitions(tab);
    } catch (HiveException e) {
        throw new SemanticException(e);
    }
    ppList = new PrunedPartitionList(tab, parts, null, unknownPartitions);
    if (partsCache != null) {
        partsCache.put(key, ppList);
    }
    return ppList;
}
Also used : Partition(org.apache.hadoop.hive.ql.metadata.Partition) PrunedPartitionList(org.apache.hadoop.hive.ql.parse.PrunedPartitionList) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException)

Example 9 with PrunedPartitionList

use of org.apache.hadoop.hive.ql.parse.PrunedPartitionList in project hive by apache.

the class IndexUtils method checkPartitionsCoveredByIndex.

/**
   * Check the partitions used by the table scan to make sure they also exist in the
   * index table.
   * @param pctx
   * @param indexes
   * @return partitions used by query.  null if they do not exist in index table
   * @throws HiveException
   */
public static Set<Partition> checkPartitionsCoveredByIndex(TableScanOperator tableScan, ParseContext pctx, List<Index> indexes) throws HiveException {
    Hive hive = Hive.get(pctx.getConf());
    // make sure each partition exists on the index table
    PrunedPartitionList queryPartitionList = pctx.getOpToPartList().get(tableScan);
    Set<Partition> queryPartitions = queryPartitionList.getPartitions();
    if (queryPartitions == null || queryPartitions.isEmpty()) {
        return null;
    }
    for (Partition part : queryPartitions) {
        if (!containsPartition(hive, part, indexes)) {
            // problem if it doesn't contain the partition
            return null;
        }
    }
    return queryPartitions;
}
Also used : Partition(org.apache.hadoop.hive.ql.metadata.Partition) Hive(org.apache.hadoop.hive.ql.metadata.Hive) PrunedPartitionList(org.apache.hadoop.hive.ql.parse.PrunedPartitionList)

Example 10 with PrunedPartitionList

use of org.apache.hadoop.hive.ql.parse.PrunedPartitionList in project hive by apache.

the class DynamicPartitionPruningOptimization method process.

@Override
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException {
    ParseContext parseContext;
    if (procCtx instanceof OptimizeTezProcContext) {
        parseContext = ((OptimizeTezProcContext) procCtx).parseContext;
    } else if (procCtx instanceof OptimizeSparkProcContext) {
        parseContext = ((OptimizeSparkProcContext) procCtx).getParseContext();
    } else {
        throw new IllegalArgumentException("expected parseContext to be either " + "OptimizeTezProcContext or OptimizeSparkProcContext, but found " + procCtx.getClass().getName());
    }
    FilterOperator filter = (FilterOperator) nd;
    FilterDesc desc = filter.getConf();
    if (!parseContext.getConf().getBoolVar(ConfVars.TEZ_DYNAMIC_PARTITION_PRUNING) && !parseContext.getConf().getBoolVar(ConfVars.SPARK_DYNAMIC_PARTITION_PRUNING)) {
        // nothing to do when the optimization is off
        return null;
    }
    TableScanOperator ts = null;
    if (filter.getParentOperators().size() == 1 && filter.getParentOperators().get(0) instanceof TableScanOperator) {
        ts = (TableScanOperator) filter.getParentOperators().get(0);
    }
    if (LOG.isDebugEnabled()) {
        LOG.debug("Parent: " + filter.getParentOperators().get(0));
        LOG.debug("Filter: " + desc.getPredicateString());
        LOG.debug("TableScan: " + ts);
    }
    DynamicPartitionPrunerContext removerContext = new DynamicPartitionPrunerContext();
    // collect the dynamic pruning conditions
    removerContext.dynLists.clear();
    collectDynamicPruningConditions(desc.getPredicate(), removerContext);
    if (ts == null) {
        // Replace the synthetic predicate with true and bail out
        for (DynamicListContext ctx : removerContext) {
            ExprNodeDesc constNode = new ExprNodeConstantDesc(ctx.parent.getTypeInfo(), true);
            replaceExprNode(ctx, desc, constNode);
        }
        return false;
    }
    final boolean semiJoin = parseContext.getConf().getBoolVar(ConfVars.TEZ_DYNAMIC_SEMIJOIN_REDUCTION);
    for (DynamicListContext ctx : removerContext) {
        String column = ExprNodeDescUtils.extractColName(ctx.parent);
        boolean semiJoinAttempted = false;
        if (column != null) {
            // Need unique IDs to refer to each min/max key value in the DynamicValueRegistry
            String keyBaseAlias = "";
            Table table = ts.getConf().getTableMetadata();
            if (table != null && table.isPartitionKey(column)) {
                String columnType = table.getPartColByName(column).getType();
                String alias = ts.getConf().getAlias();
                PrunedPartitionList plist = parseContext.getPrunedPartitions(alias, ts);
                if (LOG.isDebugEnabled()) {
                    LOG.debug("alias: " + alias);
                    LOG.debug("pruned partition list: ");
                    if (plist != null) {
                        for (Partition p : plist.getPartitions()) {
                            LOG.debug(p.getCompleteName());
                        }
                    }
                }
                // have been already filtered
                if (plist == null || plist.getPartitions().size() != 0) {
                    LOG.info("Dynamic partitioning: " + table.getCompleteName() + "." + column);
                    generateEventOperatorPlan(ctx, parseContext, ts, column, columnType);
                } else {
                    // all partitions have been statically removed
                    LOG.debug("No partition pruning necessary.");
                }
            } else {
                LOG.debug("Column " + column + " is not a partition column");
                if (semiJoin && ts.getConf().getFilterExpr() != null) {
                    LOG.debug("Initiate semijoin reduction for " + column);
                    // Get the table name from which the min-max values will come.
                    Operator<?> op = ctx.generator;
                    while (!(op == null || op instanceof TableScanOperator)) {
                        op = op.getParentOperators().get(0);
                    }
                    String tableAlias = (op == null ? "" : ((TableScanOperator) op).getConf().getAlias());
                    keyBaseAlias = ctx.generator.getOperatorId() + "_" + tableAlias + "_" + column;
                    semiJoinAttempted = generateSemiJoinOperatorPlan(ctx, parseContext, ts, keyBaseAlias);
                }
            }
            // we always remove the condition by replacing it with "true"
            if (semiJoinAttempted) {
                List<ExprNodeDesc> betweenArgs = new ArrayList<ExprNodeDesc>();
                // Do not invert between result
                betweenArgs.add(new ExprNodeConstantDesc(Boolean.FALSE));
                // add column expression here
                betweenArgs.add(ctx.parent.getChildren().get(0));
                betweenArgs.add(new ExprNodeDynamicValueDesc(new DynamicValue(keyBaseAlias + "_min", ctx.desc.getTypeInfo())));
                betweenArgs.add(new ExprNodeDynamicValueDesc(new DynamicValue(keyBaseAlias + "_max", ctx.desc.getTypeInfo())));
                ExprNodeDesc betweenNode = ExprNodeGenericFuncDesc.newInstance(FunctionRegistry.getFunctionInfo("between").getGenericUDF(), betweenArgs);
                // add column expression for bloom filter
                List<ExprNodeDesc> bloomFilterArgs = new ArrayList<ExprNodeDesc>();
                bloomFilterArgs.add(ctx.parent.getChildren().get(0));
                bloomFilterArgs.add(new ExprNodeDynamicValueDesc(new DynamicValue(keyBaseAlias + "_bloom_filter", TypeInfoFactory.binaryTypeInfo)));
                ExprNodeDesc bloomFilterNode = ExprNodeGenericFuncDesc.newInstance(FunctionRegistry.getFunctionInfo("in_bloom_filter").getGenericUDF(), bloomFilterArgs);
                List<ExprNodeDesc> andArgs = new ArrayList<ExprNodeDesc>();
                andArgs.add(betweenNode);
                andArgs.add(bloomFilterNode);
                ExprNodeDesc andExpr = ExprNodeGenericFuncDesc.newInstance(FunctionRegistry.getFunctionInfo("and").getGenericUDF(), andArgs);
                replaceExprNode(ctx, desc, andExpr);
            } else {
                ExprNodeDesc replaceNode = new ExprNodeConstantDesc(ctx.parent.getTypeInfo(), true);
                replaceExprNode(ctx, desc, replaceNode);
            }
        } else {
            ExprNodeDesc constNode = new ExprNodeConstantDesc(ctx.parent.getTypeInfo(), true);
            replaceExprNode(ctx, desc, constNode);
        }
    }
    // if we pushed the predicate into the table scan we need to remove the
    // synthetic conditions there.
    cleanTableScanFilters(ts);
    return false;
}
Also used : Partition(org.apache.hadoop.hive.ql.metadata.Partition) Table(org.apache.hadoop.hive.ql.metadata.Table) ArrayList(java.util.ArrayList) OptimizeTezProcContext(org.apache.hadoop.hive.ql.parse.OptimizeTezProcContext) PrunedPartitionList(org.apache.hadoop.hive.ql.parse.PrunedPartitionList) ParseContext(org.apache.hadoop.hive.ql.parse.ParseContext) OptimizeSparkProcContext(org.apache.hadoop.hive.ql.parse.spark.OptimizeSparkProcContext)

Aggregations

PrunedPartitionList (org.apache.hadoop.hive.ql.parse.PrunedPartitionList)18 Partition (org.apache.hadoop.hive.ql.metadata.Partition)14 Table (org.apache.hadoop.hive.ql.metadata.Table)10 ArrayList (java.util.ArrayList)9 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)8 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)6 ParseContext (org.apache.hadoop.hive.ql.parse.ParseContext)5 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)4 HashMap (java.util.HashMap)3 Map (java.util.Map)3 LinkedHashMap (java.util.LinkedHashMap)2 List (java.util.List)2 HiveConf (org.apache.hadoop.hive.conf.HiveConf)2 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)2 OrcInputFormat (org.apache.hadoop.hive.ql.io.orc.OrcInputFormat)2 ImmutableMap (com.google.common.collect.ImmutableMap)1 HashSet (java.util.HashSet)1 LinkedHashSet (java.util.LinkedHashSet)1 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)1 DruidSchema (org.apache.calcite.adapter.druid.DruidSchema)1