use of org.apache.hadoop.hive.ql.udf.generic.GenericUDFMurmurHash in project hive by apache.
the class SemanticAnalyzer method genSamplePredicate.
/**
* Generates the sampling predicate from the TABLESAMPLE clause information.
* This function uses the bucket column list to decide the expression inputs
* to the predicate hash function in case useBucketCols is set to true,
* otherwise the expression list stored in the TableSample is used. The bucket
* columns of the table are used to generate this predicate in case no
* expressions are provided on the TABLESAMPLE clause and the table has
* clustering columns defined in it's metadata. The predicate created has the
* following structure:
*
* ((hash(expressions) & Integer.MAX_VALUE) % denominator) == numerator
*
* @param ts
* TABLESAMPLE clause information
* @param bucketCols
* The clustering columns of the table
* @param useBucketCols
* Flag to indicate whether the bucketCols should be used as input to
* the hash function
* @param alias
* The alias used for the table in the row resolver
* @param rwsch
* The row resolver used to resolve column references
* @param planExpr
* The plan tree for the expression. If the user specified this, the
* parse expressions are not used
* @return exprNodeDesc
* @exception SemanticException
*/
private ExprNodeDesc genSamplePredicate(TableSample ts, List<String> bucketCols, boolean useBucketCols, String alias, RowResolver rwsch, ExprNodeDesc planExpr, int bucketingVersion) throws SemanticException {
ExprNodeDesc numeratorExpr = new ExprNodeConstantDesc(TypeInfoFactory.intTypeInfo, Integer.valueOf(ts.getNumerator() - 1));
ExprNodeDesc denominatorExpr = new ExprNodeConstantDesc(TypeInfoFactory.intTypeInfo, Integer.valueOf(ts.getDenominator()));
ExprNodeDesc intMaxExpr = new ExprNodeConstantDesc(TypeInfoFactory.intTypeInfo, Integer.valueOf(Integer.MAX_VALUE));
List<ExprNodeDesc> args = new ArrayList<ExprNodeDesc>();
if (planExpr != null) {
args.add(planExpr);
} else if (useBucketCols) {
for (String col : bucketCols) {
ColumnInfo ci = rwsch.get(alias, col);
// TODO: change type to the one in the table schema
args.add(new ExprNodeColumnDesc(ci));
}
} else {
for (ASTNode expr : ts.getExprs()) {
args.add(genExprNodeDesc(expr, rwsch));
}
}
ExprNodeDesc equalsExpr = null;
{
ExprNodeDesc hashfnExpr = new ExprNodeGenericFuncDesc(TypeInfoFactory.intTypeInfo, bucketingVersion == 2 ? new GenericUDFMurmurHash() : new GenericUDFHash(), args);
LOG.info("hashfnExpr = " + hashfnExpr);
ExprNodeDesc andExpr = ExprNodeTypeCheck.getExprNodeDefaultExprProcessor().getFuncExprNodeDesc("&", hashfnExpr, intMaxExpr);
LOG.info("andExpr = " + andExpr);
ExprNodeDesc modExpr = ExprNodeTypeCheck.getExprNodeDefaultExprProcessor().getFuncExprNodeDesc("%", andExpr, denominatorExpr);
LOG.info("modExpr = " + modExpr);
LOG.info("numeratorExpr = " + numeratorExpr);
equalsExpr = ExprNodeTypeCheck.getExprNodeDefaultExprProcessor().getFuncExprNodeDesc("==", modExpr, numeratorExpr);
LOG.info("equalsExpr = " + equalsExpr);
}
return equalsExpr;
}
Aggregations