use of org.apache.hadoop.hive.ql.parse.PrunedPartitionList in project hive by apache.
the class HiveMetaStoreChecker method checkTable.
/**
* Check the metastore for inconsistencies, data missing in either the
* metastore or on the dfs.
*
* @param dbName
* Name of the database
* @param tableName
* Name of the table
* @param partitions
* Partitions to check, if null or empty get all the partitions.
* @param result
* Result object
* @throws HiveException
* Failed to get required information from the metastore.
* @throws IOException
* Most likely filesystem related
* @throws MetaException
* Failed to get required information from the metastore.
*/
void checkTable(String dbName, String tableName, List<? extends Map<String, String>> partitions, CheckResult result) throws MetaException, IOException, HiveException {
Table table = null;
try {
table = hive.getTable(dbName, tableName);
} catch (HiveException e) {
result.getTablesNotInMs().add(tableName);
return;
}
List<Partition> parts = new ArrayList<Partition>();
boolean findUnknownPartitions = true;
if (table.isPartitioned()) {
if (partitions == null || partitions.isEmpty()) {
PrunedPartitionList prunedPartList = PartitionPruner.prune(table, null, conf, toString(), null);
// no partitions specified, let's get all
parts.addAll(prunedPartList.getPartitions());
} else {
// we're interested in specific partitions,
// don't check for any others
findUnknownPartitions = false;
for (Map<String, String> map : partitions) {
Partition part = hive.getPartition(table, map, false);
if (part == null) {
PartitionResult pr = new PartitionResult();
pr.setTableName(tableName);
pr.setPartitionName(Warehouse.makePartPath(map));
result.getPartitionsNotInMs().add(pr);
} else {
parts.add(part);
}
}
}
}
checkTable(table, parts, findUnknownPartitions, result);
}
use of org.apache.hadoop.hive.ql.parse.PrunedPartitionList in project hive by apache.
the class PartitionPruner method prune.
/**
* Get the partition list for the table that satisfies the partition pruner
* condition.
*
* @param tab
* the table object for the alias
* @param prunerExpr
* the pruner expression for the alias
* @param conf
* for checking whether "strict" mode is on.
* @param alias
* for generating error message only.
* @param prunedPartitionsMap
* cached result for the table
* @return the partition list for the table that satisfies the partition
* pruner condition.
* @throws SemanticException
*/
public static PrunedPartitionList prune(Table tab, ExprNodeDesc prunerExpr, HiveConf conf, String alias, Map<String, PrunedPartitionList> prunedPartitionsMap) throws SemanticException {
if (LOG.isTraceEnabled()) {
LOG.trace("Started pruning partition");
LOG.trace("dbname = " + tab.getDbName());
LOG.trace("tabname = " + tab.getTableName());
LOG.trace("prune Expression = " + (prunerExpr == null ? "" : prunerExpr));
}
String key = tab.getDbName() + "." + tab.getTableName() + ";";
if (!tab.isPartitioned()) {
// If the table is not partitioned, return empty list.
return getAllPartsFromCacheOrServer(tab, key, false, prunedPartitionsMap);
}
if (!hasColumnExpr(prunerExpr)) {
// If the "strict" mode is on, we have to provide partition pruner for each table.
String error = StrictChecks.checkNoPartitionFilter(conf);
if (error != null) {
throw new SemanticException(error + " No partition predicate for Alias \"" + alias + "\" Table \"" + tab.getTableName() + "\"");
}
}
if (prunerExpr == null) {
// In non-strict mode and there is no predicates at all - get everything.
return getAllPartsFromCacheOrServer(tab, key, false, prunedPartitionsMap);
}
Set<String> partColsUsedInFilter = new LinkedHashSet<String>();
// Replace virtual columns with nulls. See javadoc for details.
prunerExpr = removeNonPartCols(prunerExpr, extractPartColNames(tab), partColsUsedInFilter);
// Remove all parts that are not partition columns. See javadoc for details.
ExprNodeDesc compactExpr = compactExpr(prunerExpr.clone());
String oldFilter = prunerExpr.getExprString();
if (compactExpr == null || isBooleanExpr(compactExpr)) {
if (isFalseExpr(compactExpr)) {
return new PrunedPartitionList(tab, new LinkedHashSet<Partition>(0), new ArrayList<String>(0), false);
}
// For null and true values, return every partition
return getAllPartsFromCacheOrServer(tab, key, true, prunedPartitionsMap);
}
if (LOG.isDebugEnabled()) {
LOG.debug("Filter w/ compacting: " + compactExpr.getExprString() + "; filter w/o compacting: " + oldFilter);
}
key = key + compactExpr.getExprString();
PrunedPartitionList ppList = prunedPartitionsMap.get(key);
if (ppList != null) {
return ppList;
}
ppList = getPartitionsFromServer(tab, (ExprNodeGenericFuncDesc) compactExpr, conf, alias, partColsUsedInFilter, oldFilter.equals(compactExpr.getExprString()));
prunedPartitionsMap.put(key, ppList);
return ppList;
}
use of org.apache.hadoop.hive.ql.parse.PrunedPartitionList in project hive by apache.
the class PartitionPruner method getAllPartsFromCacheOrServer.
private static PrunedPartitionList getAllPartsFromCacheOrServer(Table tab, String key, boolean unknownPartitions, Map<String, PrunedPartitionList> partsCache) throws SemanticException {
PrunedPartitionList ppList = partsCache == null ? null : partsCache.get(key);
if (ppList != null) {
return ppList;
}
Set<Partition> parts;
try {
parts = getAllPartitions(tab);
} catch (HiveException e) {
throw new SemanticException(e);
}
ppList = new PrunedPartitionList(tab, parts, null, unknownPartitions);
if (partsCache != null) {
partsCache.put(key, ppList);
}
return ppList;
}
use of org.apache.hadoop.hive.ql.parse.PrunedPartitionList in project hive by apache.
the class IndexUtils method checkPartitionsCoveredByIndex.
/**
* Check the partitions used by the table scan to make sure they also exist in the
* index table.
* @param pctx
* @param indexes
* @return partitions used by query. null if they do not exist in index table
* @throws HiveException
*/
public static Set<Partition> checkPartitionsCoveredByIndex(TableScanOperator tableScan, ParseContext pctx, List<Index> indexes) throws HiveException {
Hive hive = Hive.get(pctx.getConf());
// make sure each partition exists on the index table
PrunedPartitionList queryPartitionList = pctx.getOpToPartList().get(tableScan);
Set<Partition> queryPartitions = queryPartitionList.getPartitions();
if (queryPartitions == null || queryPartitions.isEmpty()) {
return null;
}
for (Partition part : queryPartitions) {
if (!containsPartition(hive, part, indexes)) {
// problem if it doesn't contain the partition
return null;
}
}
return queryPartitions;
}
use of org.apache.hadoop.hive.ql.parse.PrunedPartitionList in project hive by apache.
the class DynamicPartitionPruningOptimization method process.
@Override
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException {
ParseContext parseContext;
if (procCtx instanceof OptimizeTezProcContext) {
parseContext = ((OptimizeTezProcContext) procCtx).parseContext;
} else if (procCtx instanceof OptimizeSparkProcContext) {
parseContext = ((OptimizeSparkProcContext) procCtx).getParseContext();
} else {
throw new IllegalArgumentException("expected parseContext to be either " + "OptimizeTezProcContext or OptimizeSparkProcContext, but found " + procCtx.getClass().getName());
}
FilterOperator filter = (FilterOperator) nd;
FilterDesc desc = filter.getConf();
if (!parseContext.getConf().getBoolVar(ConfVars.TEZ_DYNAMIC_PARTITION_PRUNING) && !parseContext.getConf().getBoolVar(ConfVars.SPARK_DYNAMIC_PARTITION_PRUNING)) {
// nothing to do when the optimization is off
return null;
}
TableScanOperator ts = null;
if (filter.getParentOperators().size() == 1 && filter.getParentOperators().get(0) instanceof TableScanOperator) {
ts = (TableScanOperator) filter.getParentOperators().get(0);
}
if (LOG.isDebugEnabled()) {
LOG.debug("Parent: " + filter.getParentOperators().get(0));
LOG.debug("Filter: " + desc.getPredicateString());
LOG.debug("TableScan: " + ts);
}
DynamicPartitionPrunerContext removerContext = new DynamicPartitionPrunerContext();
// collect the dynamic pruning conditions
removerContext.dynLists.clear();
collectDynamicPruningConditions(desc.getPredicate(), removerContext);
if (ts == null) {
// Replace the synthetic predicate with true and bail out
for (DynamicListContext ctx : removerContext) {
ExprNodeDesc constNode = new ExprNodeConstantDesc(ctx.parent.getTypeInfo(), true);
replaceExprNode(ctx, desc, constNode);
}
return false;
}
final boolean semiJoin = parseContext.getConf().getBoolVar(ConfVars.TEZ_DYNAMIC_SEMIJOIN_REDUCTION);
for (DynamicListContext ctx : removerContext) {
String column = ExprNodeDescUtils.extractColName(ctx.parent);
boolean semiJoinAttempted = false;
if (column != null) {
// Need unique IDs to refer to each min/max key value in the DynamicValueRegistry
String keyBaseAlias = "";
Table table = ts.getConf().getTableMetadata();
if (table != null && table.isPartitionKey(column)) {
String columnType = table.getPartColByName(column).getType();
String alias = ts.getConf().getAlias();
PrunedPartitionList plist = parseContext.getPrunedPartitions(alias, ts);
if (LOG.isDebugEnabled()) {
LOG.debug("alias: " + alias);
LOG.debug("pruned partition list: ");
if (plist != null) {
for (Partition p : plist.getPartitions()) {
LOG.debug(p.getCompleteName());
}
}
}
// have been already filtered
if (plist == null || plist.getPartitions().size() != 0) {
LOG.info("Dynamic partitioning: " + table.getCompleteName() + "." + column);
generateEventOperatorPlan(ctx, parseContext, ts, column, columnType);
} else {
// all partitions have been statically removed
LOG.debug("No partition pruning necessary.");
}
} else {
LOG.debug("Column " + column + " is not a partition column");
if (semiJoin && ts.getConf().getFilterExpr() != null) {
LOG.debug("Initiate semijoin reduction for " + column);
// Get the table name from which the min-max values will come.
Operator<?> op = ctx.generator;
while (!(op == null || op instanceof TableScanOperator)) {
op = op.getParentOperators().get(0);
}
String tableAlias = (op == null ? "" : ((TableScanOperator) op).getConf().getAlias());
keyBaseAlias = ctx.generator.getOperatorId() + "_" + tableAlias + "_" + column;
semiJoinAttempted = generateSemiJoinOperatorPlan(ctx, parseContext, ts, keyBaseAlias);
}
}
// we always remove the condition by replacing it with "true"
if (semiJoinAttempted) {
List<ExprNodeDesc> betweenArgs = new ArrayList<ExprNodeDesc>();
// Do not invert between result
betweenArgs.add(new ExprNodeConstantDesc(Boolean.FALSE));
// add column expression here
betweenArgs.add(ctx.parent.getChildren().get(0));
betweenArgs.add(new ExprNodeDynamicValueDesc(new DynamicValue(keyBaseAlias + "_min", ctx.desc.getTypeInfo())));
betweenArgs.add(new ExprNodeDynamicValueDesc(new DynamicValue(keyBaseAlias + "_max", ctx.desc.getTypeInfo())));
ExprNodeDesc betweenNode = ExprNodeGenericFuncDesc.newInstance(FunctionRegistry.getFunctionInfo("between").getGenericUDF(), betweenArgs);
// add column expression for bloom filter
List<ExprNodeDesc> bloomFilterArgs = new ArrayList<ExprNodeDesc>();
bloomFilterArgs.add(ctx.parent.getChildren().get(0));
bloomFilterArgs.add(new ExprNodeDynamicValueDesc(new DynamicValue(keyBaseAlias + "_bloom_filter", TypeInfoFactory.binaryTypeInfo)));
ExprNodeDesc bloomFilterNode = ExprNodeGenericFuncDesc.newInstance(FunctionRegistry.getFunctionInfo("in_bloom_filter").getGenericUDF(), bloomFilterArgs);
List<ExprNodeDesc> andArgs = new ArrayList<ExprNodeDesc>();
andArgs.add(betweenNode);
andArgs.add(bloomFilterNode);
ExprNodeDesc andExpr = ExprNodeGenericFuncDesc.newInstance(FunctionRegistry.getFunctionInfo("and").getGenericUDF(), andArgs);
replaceExprNode(ctx, desc, andExpr);
} else {
ExprNodeDesc replaceNode = new ExprNodeConstantDesc(ctx.parent.getTypeInfo(), true);
replaceExprNode(ctx, desc, replaceNode);
}
} else {
ExprNodeDesc constNode = new ExprNodeConstantDesc(ctx.parent.getTypeInfo(), true);
replaceExprNode(ctx, desc, constNode);
}
}
// if we pushed the predicate into the table scan we need to remove the
// synthetic conditions there.
cleanTableScanFilters(ts);
return false;
}
Aggregations