use of org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc in project hive by apache.
the class SemiJoinReductionMerge method createSemiJoinPredicate.
/**
* Creates the multi-column semi-join predicate that is applied on the target relation.
*
* Assuming that the target columns of the semi-join are fname, lname, and age, the generated predicates is:
* <pre>
* fname BETWEEN ?min_fname AND ?max_fname and
* lname BETWEEN ?min_lname AND ?max_lname and
* age BETWEEN ?min_age AND ?max_age and
* IN_BLOOM_FILTER(HASH(fname,lname,age),?bloom_filter)
* </pre>
* where the question mark (?) indicates dynamic values bound at runtime.
*/
private static ExprNodeGenericFuncDesc createSemiJoinPredicate(List<ReduceSinkOperator> sjBranches, RuntimeValuesInfo sjValueInfo, ParseContext context) {
// Performance note: To speed-up evaluation 'BETWEEN' predicates should come before the 'IN_BLOOM_FILTER'
Deque<String> dynamicIds = new ArrayDeque<>(sjValueInfo.getDynamicValueIDs());
List<ExprNodeDesc> sjPredicates = new ArrayList<>();
List<ExprNodeDesc> hashArgs = new ArrayList<>();
for (ReduceSinkOperator rs : sjBranches) {
RuntimeValuesInfo info = context.getRsToRuntimeValuesInfoMap().get(rs);
checkState(info.getTargetColumns().size() == 1, "Cannot handle multi-column semijoin branches.");
final ExprNodeDesc targetColumn = info.getTargetColumns().get(0);
TypeInfo typeInfo = targetColumn.getTypeInfo();
DynamicValue minDynamic = new DynamicValue(dynamicIds.poll(), typeInfo);
DynamicValue maxDynamic = new DynamicValue(dynamicIds.poll(), typeInfo);
List<ExprNodeDesc> betweenArgs = Arrays.asList(// Use false to not invert between result
new ExprNodeConstantDesc(Boolean.FALSE), targetColumn, new ExprNodeDynamicValueDesc(minDynamic), new ExprNodeDynamicValueDesc(maxDynamic));
ExprNodeDesc betweenExp = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, new GenericUDFBetween(), "between", betweenArgs);
sjPredicates.add(betweenExp);
hashArgs.add(targetColumn);
}
ExprNodeDesc hashExp = ExprNodeDescUtils.murmurHash(hashArgs);
assert dynamicIds.size() == 1 : "There should be one column left untreated the one with the bloom filter";
DynamicValue bloomDynamic = new DynamicValue(dynamicIds.poll(), TypeInfoFactory.binaryTypeInfo);
sjPredicates.add(new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, new GenericUDFInBloomFilter(), "in_bloom_filter", Arrays.asList(hashExp, new ExprNodeDynamicValueDesc(bloomDynamic))));
return and(sjPredicates);
}
use of org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc in project hive by apache.
the class SemiJoinReductionMerge method transform.
public ParseContext transform(ParseContext parseContext) throws SemanticException {
Map<ReduceSinkOperator, SemiJoinBranchInfo> allSemijoins = parseContext.getRsToSemiJoinBranchInfo();
if (allSemijoins.isEmpty()) {
return parseContext;
}
HiveConf hiveConf = parseContext.getConf();
for (Entry<SJSourceTarget, List<ReduceSinkOperator>> sjMergeCandidate : createMergeCandidates(allSemijoins)) {
final List<ReduceSinkOperator> sjBranches = sjMergeCandidate.getValue();
if (sjBranches.size() < 2) {
continue;
}
List<SelectOperator> selOps = new ArrayList<>(sjBranches.size());
for (ReduceSinkOperator rs : sjBranches) {
selOps.add(OperatorUtils.ancestor(rs, SelectOperator.class, 0, 0, 0, 0));
}
long sjEntriesHint = extractBloomEntriesHint(sjBranches);
SelectOperator selectOp = mergeSelectOps(sjMergeCandidate.getKey().source, selOps);
GroupByOperator gbPartialOp = createGroupBy(selectOp, selectOp, GroupByDesc.Mode.HASH, sjEntriesHint, hiveConf);
ReduceSinkOperator rsPartialOp = createReduceSink(gbPartialOp, NullOrdering.defaultNullOrder(hiveConf));
rsPartialOp.getConf().setReducerTraits(EnumSet.of(ReduceSinkDesc.ReducerTraits.QUICKSTART));
GroupByOperator gbCompleteOp = createGroupBy(selectOp, rsPartialOp, GroupByDesc.Mode.FINAL, sjEntriesHint, hiveConf);
ReduceSinkOperator rsCompleteOp = createReduceSink(gbCompleteOp, NullOrdering.defaultNullOrder(hiveConf));
final TableScanOperator sjTargetTable = sjMergeCandidate.getKey().target;
SemiJoinBranchInfo sjInfo = new SemiJoinBranchInfo(sjTargetTable, false);
parseContext.getRsToSemiJoinBranchInfo().put(rsCompleteOp, sjInfo);
// Save the info that is required at query time to resolve dynamic/runtime values.
RuntimeValuesInfo valuesInfo = createRuntimeValuesInfo(rsCompleteOp, sjBranches, parseContext);
parseContext.getRsToRuntimeValuesInfoMap().put(rsCompleteOp, valuesInfo);
ExprNodeGenericFuncDesc sjPredicate = createSemiJoinPredicate(sjBranches, valuesInfo, parseContext);
// Update filter operators with the new semi-join predicate
for (Operator<?> op : sjTargetTable.getChildOperators()) {
if (op instanceof FilterOperator) {
FilterDesc filter = ((FilterOperator) op).getConf();
filter.setPredicate(and(filter.getPredicate(), sjPredicate));
}
}
// Update tableScan with the new semi-join predicate
sjTargetTable.getConf().setFilterExpr(and(sjTargetTable.getConf().getFilterExpr(), sjPredicate));
for (ReduceSinkOperator rs : sjBranches) {
GenTezUtils.removeSemiJoinOperator(parseContext, rs, sjTargetTable);
GenTezUtils.removeBranch(rs);
}
}
return parseContext;
}
use of org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc in project hive by apache.
the class HiveRexExecutorImpl method reduce.
@Override
public void reduce(RexBuilder rexBuilder, List<RexNode> constExps, List<RexNode> reducedValues) {
RexNodeConverter rexNodeConverter = new RexNodeConverter(rexBuilder, rexBuilder.getTypeFactory());
for (RexNode rexNode : constExps) {
// initialize the converter
ExprNodeConverter converter = new ExprNodeConverter("", null, null, null, new HashSet<>(), rexBuilder);
// convert RexNode to ExprNodeGenericFuncDesc
ExprNodeDesc expr = rexNode.accept(converter);
if (expr instanceof ExprNodeGenericFuncDesc) {
// folding the constant
ExprNodeDesc constant = ConstantPropagateProcFactory.foldExpr((ExprNodeGenericFuncDesc) expr);
if (constant != null) {
addExpressionToList(constant, rexNode, rexNodeConverter, reducedValues);
} else {
reducedValues.add(rexNode);
}
} else if (expr instanceof ExprNodeConstantDesc) {
addExpressionToList(expr, rexNode, rexNodeConverter, reducedValues);
} else {
reducedValues.add(rexNode);
}
}
}
use of org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc in project hive by apache.
the class SemanticAnalyzer method genSamplePredicate.
/**
* Generates the sampling predicate from the TABLESAMPLE clause information.
* This function uses the bucket column list to decide the expression inputs
* to the predicate hash function in case useBucketCols is set to true,
* otherwise the expression list stored in the TableSample is used. The bucket
* columns of the table are used to generate this predicate in case no
* expressions are provided on the TABLESAMPLE clause and the table has
* clustering columns defined in it's metadata. The predicate created has the
* following structure:
*
* ((hash(expressions) & Integer.MAX_VALUE) % denominator) == numerator
*
* @param ts
* TABLESAMPLE clause information
* @param bucketCols
* The clustering columns of the table
* @param useBucketCols
* Flag to indicate whether the bucketCols should be used as input to
* the hash function
* @param alias
* The alias used for the table in the row resolver
* @param rwsch
* The row resolver used to resolve column references
* @param planExpr
* The plan tree for the expression. If the user specified this, the
* parse expressions are not used
* @return exprNodeDesc
* @exception SemanticException
*/
private ExprNodeDesc genSamplePredicate(TableSample ts, List<String> bucketCols, boolean useBucketCols, String alias, RowResolver rwsch, ExprNodeDesc planExpr, int bucketingVersion) throws SemanticException {
ExprNodeDesc numeratorExpr = new ExprNodeConstantDesc(TypeInfoFactory.intTypeInfo, Integer.valueOf(ts.getNumerator() - 1));
ExprNodeDesc denominatorExpr = new ExprNodeConstantDesc(TypeInfoFactory.intTypeInfo, Integer.valueOf(ts.getDenominator()));
ExprNodeDesc intMaxExpr = new ExprNodeConstantDesc(TypeInfoFactory.intTypeInfo, Integer.valueOf(Integer.MAX_VALUE));
List<ExprNodeDesc> args = new ArrayList<ExprNodeDesc>();
if (planExpr != null) {
args.add(planExpr);
} else if (useBucketCols) {
for (String col : bucketCols) {
ColumnInfo ci = rwsch.get(alias, col);
// TODO: change type to the one in the table schema
args.add(new ExprNodeColumnDesc(ci));
}
} else {
for (ASTNode expr : ts.getExprs()) {
args.add(genExprNodeDesc(expr, rwsch));
}
}
ExprNodeDesc equalsExpr = null;
{
ExprNodeDesc hashfnExpr = new ExprNodeGenericFuncDesc(TypeInfoFactory.intTypeInfo, bucketingVersion == 2 ? new GenericUDFMurmurHash() : new GenericUDFHash(), args);
LOG.info("hashfnExpr = " + hashfnExpr);
ExprNodeDesc andExpr = ExprNodeTypeCheck.getExprNodeDefaultExprProcessor().getFuncExprNodeDesc("&", hashfnExpr, intMaxExpr);
LOG.info("andExpr = " + andExpr);
ExprNodeDesc modExpr = ExprNodeTypeCheck.getExprNodeDefaultExprProcessor().getFuncExprNodeDesc("%", andExpr, denominatorExpr);
LOG.info("modExpr = " + modExpr);
LOG.info("numeratorExpr = " + numeratorExpr);
equalsExpr = ExprNodeTypeCheck.getExprNodeDefaultExprProcessor().getFuncExprNodeDesc("==", modExpr, numeratorExpr);
LOG.info("equalsExpr = " + equalsExpr);
}
return equalsExpr;
}
use of org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc in project hive by apache.
the class TestHiveMetaStoreClientApiArgumentsChecker method testGetPartitionsByExpr.
@Test
public void testGetPartitionsByExpr() throws HiveException, TException {
List<Partition> partitions = new ArrayList<>();
ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false);
ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "f");
List<ExprNodeDesc> children = Lists.newArrayList();
children.add(column);
children.add(constant);
ExprNodeGenericFuncDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPEqualOrGreaterThan(), children);
hive.getPartitionsByExpr(t, node, hive.getConf(), partitions);
}
Aggregations