Search in sources :

Example 1 with BucketNumExpression

use of org.apache.hadoop.hive.ql.exec.vector.expressions.BucketNumExpression in project hive by apache.

the class VectorizationContext method getGenericUdfVectorExpression.

private VectorExpression getGenericUdfVectorExpression(GenericUDF udf, List<ExprNodeDesc> childExpr, VectorExpressionDescriptor.Mode mode, TypeInfo returnType) throws HiveException {
    List<ExprNodeDesc> castedChildren = evaluateCastOnConstants(childExpr);
    childExpr = castedChildren;
    // First handle special cases.  If one of the special case methods cannot handle it,
    // it returns null.
    VectorExpression ve = null;
    if (udf instanceof GenericUDFBetween) {
        ve = getBetweenExpression(childExpr, mode, returnType);
    } else if (udf instanceof GenericUDFIn) {
        ve = getInExpression(childExpr, mode, returnType);
    } else if (udf instanceof GenericUDFIf) {
        ve = getIfExpression((GenericUDFIf) udf, childExpr, mode, returnType);
    } else if (udf instanceof GenericUDFWhen) {
        ve = getWhenExpression(childExpr, mode, returnType);
    } else if (udf instanceof GenericUDFOPPositive) {
        ve = getIdentityExpression(childExpr);
    } else if (udf instanceof GenericUDFCoalesce) {
        ve = getCoalesceExpression(childExpr, mode, returnType);
    } else if (udf instanceof GenericUDFElt) {
        // Elt is a special case because it can take variable number of arguments.
        ve = getEltExpression(childExpr, returnType);
    } else if (udf instanceof GenericUDFGrouping) {
        ve = getGroupingExpression((GenericUDFGrouping) udf, childExpr, returnType);
    } else if (udf instanceof GenericUDFBridge) {
        ve = getGenericUDFBridgeVectorExpression((GenericUDFBridge) udf, childExpr, mode, returnType);
    } else if (udf instanceof GenericUDFToString) {
        ve = getCastToString(childExpr, returnType);
    } else if (udf instanceof GenericUDFToDecimal) {
        ve = getCastToDecimal(childExpr, returnType);
    } else if (udf instanceof GenericUDFToChar) {
        ve = getCastToChar(childExpr, returnType);
    } else if (udf instanceof GenericUDFToVarchar) {
        ve = getCastToVarChar(childExpr, returnType);
    } else if (udf instanceof GenericUDFToBinary) {
        ve = getCastToBinary(childExpr, returnType);
    } else if (udf instanceof GenericUDFTimestamp) {
        ve = getCastToTimestamp((GenericUDFTimestamp) udf, childExpr, mode, returnType);
    } else if (udf instanceof GenericUDFDate || udf instanceof GenericUDFToDate) {
        ve = getIdentityForDateToDate(childExpr, returnType);
    } else if (udf instanceof GenericUDFBucketNumber) {
        int outCol = ocm.allocateOutputColumn(returnType);
        ve = new BucketNumExpression(outCol);
        ve.setInputTypeInfos(returnType);
        ve.setOutputTypeInfo(returnType);
    } else if (udf instanceof GenericUDFCastFormat) {
        ve = getCastWithFormat(udf, childExpr, returnType);
    }
    if (ve != null) {
        return ve;
    }
    // Now do a general lookup
    Class<?> udfClass = udf.getClass();
    boolean isSubstituted = false;
    if (udf instanceof GenericUDFBridge) {
        udfClass = ((GenericUDFBridge) udf).getUdfClass();
        isSubstituted = true;
    }
    ve = getVectorExpressionForUdf((!isSubstituted ? udf : null), udfClass, castedChildren, mode, returnType);
    return ve;
}
Also used : BucketNumExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.BucketNumExpression) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) FilterConstantBooleanVectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.FilterConstantBooleanVectorExpression) ConstantVectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.ConstantVectorExpression) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression) DynamicValueVectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.DynamicValueVectorExpression)

Example 2 with BucketNumExpression

use of org.apache.hadoop.hive.ql.exec.vector.expressions.BucketNumExpression in project hive by apache.

the class VectorReduceSinkObjectHashOperator method initializeOp.

@Override
protected void initializeOp(Configuration hconf) throws HiveException {
    super.initializeOp(hconf);
    VectorExpression.doTransientInit(reduceSinkBucketExpressions, hconf);
    VectorExpression.doTransientInit(reduceSinkPartitionExpressions, hconf);
    if (!isEmptyKey) {
        // For this variation, we serialize the key without caring if it single Long,
        // single String, multi-key, etc.
        keyOutput = new Output();
        keyBinarySortableSerializeWrite.set(keyOutput);
        keyVectorSerializeRow = new VectorSerializeRow<BinarySortableSerializeWrite>(keyBinarySortableSerializeWrite);
        keyVectorSerializeRow.init(reduceSinkKeyTypeInfos, reduceSinkKeyColumnMap);
    }
    if (isEmptyBuckets) {
        numBuckets = 0;
    } else {
        numBuckets = conf.getNumBuckets();
        bucketObjectInspectors = getObjectInspectorArray(reduceSinkBucketTypeInfos);
        bucketVectorExtractRow = new VectorExtractRow();
        bucketVectorExtractRow.init(reduceSinkBucketTypeInfos, reduceSinkBucketColumnMap);
        bucketFieldValues = new Object[reduceSinkBucketTypeInfos.length];
    }
    if (isEmptyPartitions) {
        nonPartitionRandom = new Random(12345);
    } else {
        partitionObjectInspectors = getObjectInspectorArray(reduceSinkPartitionTypeInfos);
        partitionVectorExtractRow = new VectorExtractRow();
        partitionVectorExtractRow.init(reduceSinkPartitionTypeInfos, reduceSinkPartitionColumnMap);
        partitionFieldValues = new Object[reduceSinkPartitionTypeInfos.length];
    }
    // Set hashFunc
    hashFunc = getConf().getBucketingVersion() == 2 && !vectorDesc.getIsAcidChange() ? ObjectInspectorUtils::getBucketHashCode : ObjectInspectorUtils::getBucketHashCodeOld;
    // Set function to evaluate _bucket_number if needed.
    if (reduceSinkKeyExpressions != null) {
        for (VectorExpression ve : reduceSinkKeyExpressions) {
            if (ve instanceof BucketNumExpression) {
                bucketExpr = (BucketNumExpression) ve;
                break;
            }
        }
    }
}
Also used : Random(java.util.Random) Output(org.apache.hadoop.hive.serde2.ByteStream.Output) BucketNumExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.BucketNumExpression) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression) BinarySortableSerializeWrite(org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite) VectorExtractRow(org.apache.hadoop.hive.ql.exec.vector.VectorExtractRow)

Example 3 with BucketNumExpression

use of org.apache.hadoop.hive.ql.exec.vector.expressions.BucketNumExpression in project hive by apache.

the class VectorReduceSinkObjectHashOperator method process.

@Override
public void process(Object row, int tag) throws HiveException {
    try {
        VectorizedRowBatch batch = (VectorizedRowBatch) row;
        batchCounter++;
        if (batch.size == 0) {
            if (LOG.isDebugEnabled()) {
                LOG.debug(CLASS_NAME + " batch #" + batchCounter + " empty");
            }
            return;
        }
        if (!isKeyInitialized) {
            isKeyInitialized = true;
            if (isEmptyKey) {
                initializeEmptyKey(tag);
            }
        }
        // Perform any key expressions.  Results will go into scratch columns.
        if (reduceSinkKeyExpressions != null) {
            for (VectorExpression ve : reduceSinkKeyExpressions) {
                // Handle _bucket_number
                if (ve instanceof BucketNumExpression) {
                    // Evaluate per row
                    continue;
                }
                ve.evaluate(batch);
            }
        }
        // Perform any value expressions.  Results will go into scratch columns.
        if (reduceSinkValueExpressions != null) {
            for (VectorExpression ve : reduceSinkValueExpressions) {
                ve.evaluate(batch);
            }
        }
        // Perform any bucket expressions.  Results will go into scratch columns.
        if (reduceSinkBucketExpressions != null) {
            for (VectorExpression ve : reduceSinkBucketExpressions) {
                ve.evaluate(batch);
            }
        }
        // Perform any partition expressions.  Results will go into scratch columns.
        if (reduceSinkPartitionExpressions != null) {
            for (VectorExpression ve : reduceSinkPartitionExpressions) {
                ve.evaluate(batch);
            }
        }
        final boolean selectedInUse = batch.selectedInUse;
        int[] selected = batch.selected;
        final int size = batch.size;
        for (int logical = 0; logical < size; logical++) {
            final int batchIndex = (selectedInUse ? selected[logical] : logical);
            int hashCode;
            if (isEmptyPartitions) {
                if (isSingleReducer) {
                    // Empty partition, single reducer -> constant hashCode
                    hashCode = 0;
                } else {
                    // Empty partition, multiple reducers -> random hashCode
                    hashCode = nonPartitionRandom.nextInt();
                }
            } else {
                // Compute hashCode from partitions
                partitionVectorExtractRow.extractRow(batch, batchIndex, partitionFieldValues);
                hashCode = hashFunc.apply(partitionFieldValues, partitionObjectInspectors);
            }
            // Compute hashCode from buckets
            if (!isEmptyBuckets) {
                bucketVectorExtractRow.extractRow(batch, batchIndex, bucketFieldValues);
                final int bucketNum = ObjectInspectorUtils.getBucketNumber(hashFunc.apply(bucketFieldValues, bucketObjectInspectors), numBuckets);
                if (bucketExpr != null) {
                    evaluateBucketExpr(batch, batchIndex, bucketNum);
                }
                hashCode = hashCode * 31 + bucketNum;
            }
            postProcess(batch, batchIndex, tag, hashCode);
        }
    } catch (Exception e) {
        throw new HiveException(e);
    }
}
Also used : VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) BucketNumExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.BucketNumExpression) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException)

Aggregations

BucketNumExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.BucketNumExpression)3 VectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)3 Random (java.util.Random)1 VectorExtractRow (org.apache.hadoop.hive.ql.exec.vector.VectorExtractRow)1 VectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch)1 ConstantVectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.ConstantVectorExpression)1 DynamicValueVectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.DynamicValueVectorExpression)1 FilterConstantBooleanVectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.FilterConstantBooleanVectorExpression)1 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)1 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)1 Output (org.apache.hadoop.hive.serde2.ByteStream.Output)1 BinarySortableSerializeWrite (org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite)1