Search in sources :

Example 1 with GenericUDFMurmurHash

use of org.apache.hadoop.hive.ql.udf.generic.GenericUDFMurmurHash in project hive by apache.

the class SemanticAnalyzer method genSamplePredicate.

/**
 * Generates the sampling predicate from the TABLESAMPLE clause information.
 * This function uses the bucket column list to decide the expression inputs
 * to the predicate hash function in case useBucketCols is set to true,
 * otherwise the expression list stored in the TableSample is used. The bucket
 * columns of the table are used to generate this predicate in case no
 * expressions are provided on the TABLESAMPLE clause and the table has
 * clustering columns defined in it's metadata. The predicate created has the
 * following structure:
 *
 * ((hash(expressions) & Integer.MAX_VALUE) % denominator) == numerator
 *
 * @param ts
 *          TABLESAMPLE clause information
 * @param bucketCols
 *          The clustering columns of the table
 * @param useBucketCols
 *          Flag to indicate whether the bucketCols should be used as input to
 *          the hash function
 * @param alias
 *          The alias used for the table in the row resolver
 * @param rwsch
 *          The row resolver used to resolve column references
 * @param planExpr
 *          The plan tree for the expression. If the user specified this, the
 *          parse expressions are not used
 * @return exprNodeDesc
 * @exception SemanticException
 */
private ExprNodeDesc genSamplePredicate(TableSample ts, List<String> bucketCols, boolean useBucketCols, String alias, RowResolver rwsch, ExprNodeDesc planExpr, int bucketingVersion) throws SemanticException {
    ExprNodeDesc numeratorExpr = new ExprNodeConstantDesc(TypeInfoFactory.intTypeInfo, Integer.valueOf(ts.getNumerator() - 1));
    ExprNodeDesc denominatorExpr = new ExprNodeConstantDesc(TypeInfoFactory.intTypeInfo, Integer.valueOf(ts.getDenominator()));
    ExprNodeDesc intMaxExpr = new ExprNodeConstantDesc(TypeInfoFactory.intTypeInfo, Integer.valueOf(Integer.MAX_VALUE));
    List<ExprNodeDesc> args = new ArrayList<ExprNodeDesc>();
    if (planExpr != null) {
        args.add(planExpr);
    } else if (useBucketCols) {
        for (String col : bucketCols) {
            ColumnInfo ci = rwsch.get(alias, col);
            // TODO: change type to the one in the table schema
            args.add(new ExprNodeColumnDesc(ci));
        }
    } else {
        for (ASTNode expr : ts.getExprs()) {
            args.add(genExprNodeDesc(expr, rwsch));
        }
    }
    ExprNodeDesc equalsExpr = null;
    {
        ExprNodeDesc hashfnExpr = new ExprNodeGenericFuncDesc(TypeInfoFactory.intTypeInfo, bucketingVersion == 2 ? new GenericUDFMurmurHash() : new GenericUDFHash(), args);
        LOG.info("hashfnExpr = " + hashfnExpr);
        ExprNodeDesc andExpr = ExprNodeTypeCheck.getExprNodeDefaultExprProcessor().getFuncExprNodeDesc("&", hashfnExpr, intMaxExpr);
        LOG.info("andExpr = " + andExpr);
        ExprNodeDesc modExpr = ExprNodeTypeCheck.getExprNodeDefaultExprProcessor().getFuncExprNodeDesc("%", andExpr, denominatorExpr);
        LOG.info("modExpr = " + modExpr);
        LOG.info("numeratorExpr = " + numeratorExpr);
        equalsExpr = ExprNodeTypeCheck.getExprNodeDefaultExprProcessor().getFuncExprNodeDesc("==", modExpr, numeratorExpr);
        LOG.info("equalsExpr = " + equalsExpr);
    }
    return equalsExpr;
}
Also used : ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) GenericUDFMurmurHash(org.apache.hadoop.hive.ql.udf.generic.GenericUDFMurmurHash) ArrayList(java.util.ArrayList) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) GenericUDFHash(org.apache.hadoop.hive.ql.udf.generic.GenericUDFHash) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Aggregations

ArrayList (java.util.ArrayList)1 ColumnInfo (org.apache.hadoop.hive.ql.exec.ColumnInfo)1 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)1 ExprNodeConstantDesc (org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc)1 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)1 ExprNodeGenericFuncDesc (org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)1 GenericUDFHash (org.apache.hadoop.hive.ql.udf.generic.GenericUDFHash)1 GenericUDFMurmurHash (org.apache.hadoop.hive.ql.udf.generic.GenericUDFMurmurHash)1