use of org.apache.hadoop.hive.ql.udf.generic.GenericUDFHash in project presto by prestodb.
the class HiveBucketing method getHiveBucket.
public static Optional<HiveBucket> getHiveBucket(List<Entry<ObjectInspector, Object>> columnBindings, int bucketCount) {
try {
@SuppressWarnings("resource") GenericUDFHash udf = new GenericUDFHash();
ObjectInspector[] objectInspectors = new ObjectInspector[columnBindings.size()];
DeferredObject[] deferredObjects = new DeferredObject[columnBindings.size()];
int i = 0;
for (Entry<ObjectInspector, Object> entry : columnBindings) {
objectInspectors[i] = getJavaObjectInspector(entry.getKey());
deferredObjects[i] = getJavaDeferredObject(entry.getValue(), entry.getKey());
i++;
}
ObjectInspector udfInspector = udf.initialize(objectInspectors);
IntObjectInspector inspector = (IntObjectInspector) udfInspector;
Object result = udf.evaluate(deferredObjects);
HiveKey hiveKey = new HiveKey();
hiveKey.setHashCode(inspector.get(result));
int bucketNumber = new DefaultHivePartitioner<>().getBucket(hiveKey, null, bucketCount);
return Optional.of(new HiveBucket(bucketNumber, bucketCount));
} catch (HiveException e) {
log.debug(e, "Error evaluating bucket number");
return Optional.empty();
}
}
use of org.apache.hadoop.hive.ql.udf.generic.GenericUDFHash in project hive by apache.
the class SemanticAnalyzer method genSamplePredicate.
/**
* Generates the sampling predicate from the TABLESAMPLE clause information.
* This function uses the bucket column list to decide the expression inputs
* to the predicate hash function in case useBucketCols is set to true,
* otherwise the expression list stored in the TableSample is used. The bucket
* columns of the table are used to generate this predicate in case no
* expressions are provided on the TABLESAMPLE clause and the table has
* clustering columns defined in it's metadata. The predicate created has the
* following structure:
*
* ((hash(expressions) & Integer.MAX_VALUE) % denominator) == numerator
*
* @param ts
* TABLESAMPLE clause information
* @param bucketCols
* The clustering columns of the table
* @param useBucketCols
* Flag to indicate whether the bucketCols should be used as input to
* the hash function
* @param alias
* The alias used for the table in the row resolver
* @param rwsch
* The row resolver used to resolve column references
* @param qbm
* The metadata information for the query block which is used to
* resolve unaliased columns
* @param planExpr
* The plan tree for the expression. If the user specified this, the
* parse expressions are not used
* @return exprNodeDesc
* @exception SemanticException
*/
private ExprNodeDesc genSamplePredicate(TableSample ts, List<String> bucketCols, boolean useBucketCols, String alias, RowResolver rwsch, QBMetaData qbm, ExprNodeDesc planExpr) throws SemanticException {
ExprNodeDesc numeratorExpr = new ExprNodeConstantDesc(TypeInfoFactory.intTypeInfo, Integer.valueOf(ts.getNumerator() - 1));
ExprNodeDesc denominatorExpr = new ExprNodeConstantDesc(TypeInfoFactory.intTypeInfo, Integer.valueOf(ts.getDenominator()));
ExprNodeDesc intMaxExpr = new ExprNodeConstantDesc(TypeInfoFactory.intTypeInfo, Integer.valueOf(Integer.MAX_VALUE));
ArrayList<ExprNodeDesc> args = new ArrayList<ExprNodeDesc>();
if (planExpr != null) {
args.add(planExpr);
} else if (useBucketCols) {
for (String col : bucketCols) {
ColumnInfo ci = rwsch.get(alias, col);
// TODO: change type to the one in the table schema
args.add(new ExprNodeColumnDesc(ci));
}
} else {
for (ASTNode expr : ts.getExprs()) {
args.add(genExprNodeDesc(expr, rwsch));
}
}
ExprNodeDesc equalsExpr = null;
{
ExprNodeDesc hashfnExpr = new ExprNodeGenericFuncDesc(TypeInfoFactory.intTypeInfo, new GenericUDFHash(), args);
assert (hashfnExpr != null);
LOG.info("hashfnExpr = " + hashfnExpr);
ExprNodeDesc andExpr = TypeCheckProcFactory.DefaultExprProcessor.getFuncExprNodeDesc("&", hashfnExpr, intMaxExpr);
assert (andExpr != null);
LOG.info("andExpr = " + andExpr);
ExprNodeDesc modExpr = TypeCheckProcFactory.DefaultExprProcessor.getFuncExprNodeDesc("%", andExpr, denominatorExpr);
assert (modExpr != null);
LOG.info("modExpr = " + modExpr);
LOG.info("numeratorExpr = " + numeratorExpr);
equalsExpr = TypeCheckProcFactory.DefaultExprProcessor.getFuncExprNodeDesc("==", modExpr, numeratorExpr);
LOG.info("equalsExpr = " + equalsExpr);
assert (equalsExpr != null);
}
return equalsExpr;
}
use of org.apache.hadoop.hive.ql.udf.generic.GenericUDFHash in project presto by prestodb.
the class TestHiveBucketing method getHiveBucket.
public static int getHiveBucket(List<Entry<ObjectInspector, Object>> columnBindings, int bucketCount) throws HiveException {
GenericUDFHash udf = new GenericUDFHash();
ObjectInspector[] objectInspectors = new ObjectInspector[columnBindings.size()];
GenericUDF.DeferredObject[] deferredObjects = new GenericUDF.DeferredObject[columnBindings.size()];
int i = 0;
for (Entry<ObjectInspector, Object> entry : columnBindings) {
objectInspectors[i] = entry.getKey();
if (entry.getValue() != null && entry.getKey() instanceof JavaHiveVarcharObjectInspector) {
JavaHiveVarcharObjectInspector varcharObjectInspector = (JavaHiveVarcharObjectInspector) entry.getKey();
deferredObjects[i] = new GenericUDF.DeferredJavaObject(new HiveVarchar(((String) entry.getValue()), varcharObjectInspector.getMaxLength()));
} else {
deferredObjects[i] = new GenericUDF.DeferredJavaObject(entry.getValue());
}
i++;
}
ObjectInspector udfInspector = udf.initialize(objectInspectors);
IntObjectInspector inspector = (IntObjectInspector) udfInspector;
Object result = udf.evaluate(deferredObjects);
HiveKey hiveKey = new HiveKey();
hiveKey.setHashCode(inspector.get(result));
return new DefaultHivePartitioner<>().getBucket(hiveKey, null, bucketCount);
}
Aggregations