Search in sources :

Example 1 with GenericUDFHash

use of org.apache.hadoop.hive.ql.udf.generic.GenericUDFHash in project presto by prestodb.

the class HiveBucketing method getHiveBucket.

public static Optional<HiveBucket> getHiveBucket(List<Entry<ObjectInspector, Object>> columnBindings, int bucketCount) {
    try {
        @SuppressWarnings("resource") GenericUDFHash udf = new GenericUDFHash();
        ObjectInspector[] objectInspectors = new ObjectInspector[columnBindings.size()];
        DeferredObject[] deferredObjects = new DeferredObject[columnBindings.size()];
        int i = 0;
        for (Entry<ObjectInspector, Object> entry : columnBindings) {
            objectInspectors[i] = getJavaObjectInspector(entry.getKey());
            deferredObjects[i] = getJavaDeferredObject(entry.getValue(), entry.getKey());
            i++;
        }
        ObjectInspector udfInspector = udf.initialize(objectInspectors);
        IntObjectInspector inspector = (IntObjectInspector) udfInspector;
        Object result = udf.evaluate(deferredObjects);
        HiveKey hiveKey = new HiveKey();
        hiveKey.setHashCode(inspector.get(result));
        int bucketNumber = new DefaultHivePartitioner<>().getBucket(hiveKey, null, bucketCount);
        return Optional.of(new HiveBucket(bucketNumber, bucketCount));
    } catch (HiveException e) {
        log.debug(e, "Error evaluating bucket number");
        return Optional.empty();
    }
}
Also used : PrimitiveObjectInspectorFactory.javaByteObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaByteObjectInspector) PrimitiveObjectInspectorFactory.javaLongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaLongObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) PrimitiveObjectInspectorFactory.javaIntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaIntObjectInspector) PrimitiveObjectInspectorFactory.javaBooleanObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaBooleanObjectInspector) IntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector) PrimitiveObjectInspectorFactory.javaShortObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaShortObjectInspector) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) PrimitiveObjectInspectorFactory.javaStringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaStringObjectInspector) PrimitiveObjectInspectorFactory.javaIntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaIntObjectInspector) IntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) DeferredObject(org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject) HiveKey(org.apache.hadoop.hive.ql.io.HiveKey) GenericUDFHash(org.apache.hadoop.hive.ql.udf.generic.GenericUDFHash) DeferredJavaObject(org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredJavaObject) DeferredObject(org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject)

Example 2 with GenericUDFHash

use of org.apache.hadoop.hive.ql.udf.generic.GenericUDFHash in project hive by apache.

the class SemanticAnalyzer method genSamplePredicate.

/**
   * Generates the sampling predicate from the TABLESAMPLE clause information.
   * This function uses the bucket column list to decide the expression inputs
   * to the predicate hash function in case useBucketCols is set to true,
   * otherwise the expression list stored in the TableSample is used. The bucket
   * columns of the table are used to generate this predicate in case no
   * expressions are provided on the TABLESAMPLE clause and the table has
   * clustering columns defined in it's metadata. The predicate created has the
   * following structure:
   *
   * ((hash(expressions) & Integer.MAX_VALUE) % denominator) == numerator
   *
   * @param ts
   *          TABLESAMPLE clause information
   * @param bucketCols
   *          The clustering columns of the table
   * @param useBucketCols
   *          Flag to indicate whether the bucketCols should be used as input to
   *          the hash function
   * @param alias
   *          The alias used for the table in the row resolver
   * @param rwsch
   *          The row resolver used to resolve column references
   * @param qbm
   *          The metadata information for the query block which is used to
   *          resolve unaliased columns
   * @param planExpr
   *          The plan tree for the expression. If the user specified this, the
   *          parse expressions are not used
   * @return exprNodeDesc
   * @exception SemanticException
   */
private ExprNodeDesc genSamplePredicate(TableSample ts, List<String> bucketCols, boolean useBucketCols, String alias, RowResolver rwsch, QBMetaData qbm, ExprNodeDesc planExpr) throws SemanticException {
    ExprNodeDesc numeratorExpr = new ExprNodeConstantDesc(TypeInfoFactory.intTypeInfo, Integer.valueOf(ts.getNumerator() - 1));
    ExprNodeDesc denominatorExpr = new ExprNodeConstantDesc(TypeInfoFactory.intTypeInfo, Integer.valueOf(ts.getDenominator()));
    ExprNodeDesc intMaxExpr = new ExprNodeConstantDesc(TypeInfoFactory.intTypeInfo, Integer.valueOf(Integer.MAX_VALUE));
    ArrayList<ExprNodeDesc> args = new ArrayList<ExprNodeDesc>();
    if (planExpr != null) {
        args.add(planExpr);
    } else if (useBucketCols) {
        for (String col : bucketCols) {
            ColumnInfo ci = rwsch.get(alias, col);
            // TODO: change type to the one in the table schema
            args.add(new ExprNodeColumnDesc(ci));
        }
    } else {
        for (ASTNode expr : ts.getExprs()) {
            args.add(genExprNodeDesc(expr, rwsch));
        }
    }
    ExprNodeDesc equalsExpr = null;
    {
        ExprNodeDesc hashfnExpr = new ExprNodeGenericFuncDesc(TypeInfoFactory.intTypeInfo, new GenericUDFHash(), args);
        assert (hashfnExpr != null);
        LOG.info("hashfnExpr = " + hashfnExpr);
        ExprNodeDesc andExpr = TypeCheckProcFactory.DefaultExprProcessor.getFuncExprNodeDesc("&", hashfnExpr, intMaxExpr);
        assert (andExpr != null);
        LOG.info("andExpr = " + andExpr);
        ExprNodeDesc modExpr = TypeCheckProcFactory.DefaultExprProcessor.getFuncExprNodeDesc("%", andExpr, denominatorExpr);
        assert (modExpr != null);
        LOG.info("modExpr = " + modExpr);
        LOG.info("numeratorExpr = " + numeratorExpr);
        equalsExpr = TypeCheckProcFactory.DefaultExprProcessor.getFuncExprNodeDesc("==", modExpr, numeratorExpr);
        LOG.info("equalsExpr = " + equalsExpr);
        assert (equalsExpr != null);
    }
    return equalsExpr;
}
Also used : ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) ArrayList(java.util.ArrayList) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) GenericUDFHash(org.apache.hadoop.hive.ql.udf.generic.GenericUDFHash) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Example 3 with GenericUDFHash

use of org.apache.hadoop.hive.ql.udf.generic.GenericUDFHash in project presto by prestodb.

the class TestHiveBucketing method getHiveBucket.

public static int getHiveBucket(List<Entry<ObjectInspector, Object>> columnBindings, int bucketCount) throws HiveException {
    GenericUDFHash udf = new GenericUDFHash();
    ObjectInspector[] objectInspectors = new ObjectInspector[columnBindings.size()];
    GenericUDF.DeferredObject[] deferredObjects = new GenericUDF.DeferredObject[columnBindings.size()];
    int i = 0;
    for (Entry<ObjectInspector, Object> entry : columnBindings) {
        objectInspectors[i] = entry.getKey();
        if (entry.getValue() != null && entry.getKey() instanceof JavaHiveVarcharObjectInspector) {
            JavaHiveVarcharObjectInspector varcharObjectInspector = (JavaHiveVarcharObjectInspector) entry.getKey();
            deferredObjects[i] = new GenericUDF.DeferredJavaObject(new HiveVarchar(((String) entry.getValue()), varcharObjectInspector.getMaxLength()));
        } else {
            deferredObjects[i] = new GenericUDF.DeferredJavaObject(entry.getValue());
        }
        i++;
    }
    ObjectInspector udfInspector = udf.initialize(objectInspectors);
    IntObjectInspector inspector = (IntObjectInspector) udfInspector;
    Object result = udf.evaluate(deferredObjects);
    HiveKey hiveKey = new HiveKey();
    hiveKey.setHashCode(inspector.get(result));
    return new DefaultHivePartitioner<>().getBucket(hiveKey, null, bucketCount);
}
Also used : JavaHiveVarcharObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaHiveVarcharObjectInspector) PrimitiveObjectInspectorFactory.javaBooleanObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaBooleanObjectInspector) PrimitiveObjectInspectorFactory.javaLongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaLongObjectInspector) IntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) PrimitiveObjectInspectorFactory.javaStringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaStringObjectInspector) IntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector) HiveVarchar(org.apache.hadoop.hive.common.type.HiveVarchar) JavaHiveVarcharObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaHiveVarcharObjectInspector) GenericUDF(org.apache.hadoop.hive.ql.udf.generic.GenericUDF) HiveKey(org.apache.hadoop.hive.ql.io.HiveKey) GenericUDFHash(org.apache.hadoop.hive.ql.udf.generic.GenericUDFHash)

Aggregations

GenericUDFHash (org.apache.hadoop.hive.ql.udf.generic.GenericUDFHash)3 HiveKey (org.apache.hadoop.hive.ql.io.HiveKey)2 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)2 IntObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector)2 PrimitiveObjectInspectorFactory.javaBooleanObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaBooleanObjectInspector)2 PrimitiveObjectInspectorFactory.javaLongObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaLongObjectInspector)2 PrimitiveObjectInspectorFactory.javaStringObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaStringObjectInspector)2 ArrayList (java.util.ArrayList)1 HiveVarchar (org.apache.hadoop.hive.common.type.HiveVarchar)1 ColumnInfo (org.apache.hadoop.hive.ql.exec.ColumnInfo)1 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)1 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)1 ExprNodeConstantDesc (org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc)1 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)1 ExprNodeGenericFuncDesc (org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)1 GenericUDF (org.apache.hadoop.hive.ql.udf.generic.GenericUDF)1 DeferredJavaObject (org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredJavaObject)1 DeferredObject (org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject)1 PrimitiveObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)1 JavaHiveVarcharObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaHiveVarcharObjectInspector)1