use of org.apache.hadoop.hive.ql.exec.vector.expressions.BucketNumExpression in project hive by apache.
the class VectorizationContext method getGenericUdfVectorExpression.
private VectorExpression getGenericUdfVectorExpression(GenericUDF udf, List<ExprNodeDesc> childExpr, VectorExpressionDescriptor.Mode mode, TypeInfo returnType) throws HiveException {
List<ExprNodeDesc> castedChildren = evaluateCastOnConstants(childExpr);
childExpr = castedChildren;
// First handle special cases. If one of the special case methods cannot handle it,
// it returns null.
VectorExpression ve = null;
if (udf instanceof GenericUDFBetween) {
ve = getBetweenExpression(childExpr, mode, returnType);
} else if (udf instanceof GenericUDFIn) {
ve = getInExpression(childExpr, mode, returnType);
} else if (udf instanceof GenericUDFIf) {
ve = getIfExpression((GenericUDFIf) udf, childExpr, mode, returnType);
} else if (udf instanceof GenericUDFWhen) {
ve = getWhenExpression(childExpr, mode, returnType);
} else if (udf instanceof GenericUDFOPPositive) {
ve = getIdentityExpression(childExpr);
} else if (udf instanceof GenericUDFCoalesce) {
ve = getCoalesceExpression(childExpr, mode, returnType);
} else if (udf instanceof GenericUDFElt) {
// Elt is a special case because it can take variable number of arguments.
ve = getEltExpression(childExpr, returnType);
} else if (udf instanceof GenericUDFGrouping) {
ve = getGroupingExpression((GenericUDFGrouping) udf, childExpr, returnType);
} else if (udf instanceof GenericUDFBridge) {
ve = getGenericUDFBridgeVectorExpression((GenericUDFBridge) udf, childExpr, mode, returnType);
} else if (udf instanceof GenericUDFToString) {
ve = getCastToString(childExpr, returnType);
} else if (udf instanceof GenericUDFToDecimal) {
ve = getCastToDecimal(childExpr, returnType);
} else if (udf instanceof GenericUDFToChar) {
ve = getCastToChar(childExpr, returnType);
} else if (udf instanceof GenericUDFToVarchar) {
ve = getCastToVarChar(childExpr, returnType);
} else if (udf instanceof GenericUDFToBinary) {
ve = getCastToBinary(childExpr, returnType);
} else if (udf instanceof GenericUDFTimestamp) {
ve = getCastToTimestamp((GenericUDFTimestamp) udf, childExpr, mode, returnType);
} else if (udf instanceof GenericUDFDate || udf instanceof GenericUDFToDate) {
ve = getIdentityForDateToDate(childExpr, returnType);
} else if (udf instanceof GenericUDFBucketNumber) {
int outCol = ocm.allocateOutputColumn(returnType);
ve = new BucketNumExpression(outCol);
ve.setInputTypeInfos(returnType);
ve.setOutputTypeInfo(returnType);
} else if (udf instanceof GenericUDFCastFormat) {
ve = getCastWithFormat(udf, childExpr, returnType);
}
if (ve != null) {
return ve;
}
// Now do a general lookup
Class<?> udfClass = udf.getClass();
boolean isSubstituted = false;
if (udf instanceof GenericUDFBridge) {
udfClass = ((GenericUDFBridge) udf).getUdfClass();
isSubstituted = true;
}
ve = getVectorExpressionForUdf((!isSubstituted ? udf : null), udfClass, castedChildren, mode, returnType);
return ve;
}
use of org.apache.hadoop.hive.ql.exec.vector.expressions.BucketNumExpression in project hive by apache.
the class VectorReduceSinkObjectHashOperator method initializeOp.
@Override
protected void initializeOp(Configuration hconf) throws HiveException {
super.initializeOp(hconf);
VectorExpression.doTransientInit(reduceSinkBucketExpressions, hconf);
VectorExpression.doTransientInit(reduceSinkPartitionExpressions, hconf);
if (!isEmptyKey) {
// For this variation, we serialize the key without caring if it single Long,
// single String, multi-key, etc.
keyOutput = new Output();
keyBinarySortableSerializeWrite.set(keyOutput);
keyVectorSerializeRow = new VectorSerializeRow<BinarySortableSerializeWrite>(keyBinarySortableSerializeWrite);
keyVectorSerializeRow.init(reduceSinkKeyTypeInfos, reduceSinkKeyColumnMap);
}
if (isEmptyBuckets) {
numBuckets = 0;
} else {
numBuckets = conf.getNumBuckets();
bucketObjectInspectors = getObjectInspectorArray(reduceSinkBucketTypeInfos);
bucketVectorExtractRow = new VectorExtractRow();
bucketVectorExtractRow.init(reduceSinkBucketTypeInfos, reduceSinkBucketColumnMap);
bucketFieldValues = new Object[reduceSinkBucketTypeInfos.length];
}
if (isEmptyPartitions) {
nonPartitionRandom = new Random(12345);
} else {
partitionObjectInspectors = getObjectInspectorArray(reduceSinkPartitionTypeInfos);
partitionVectorExtractRow = new VectorExtractRow();
partitionVectorExtractRow.init(reduceSinkPartitionTypeInfos, reduceSinkPartitionColumnMap);
partitionFieldValues = new Object[reduceSinkPartitionTypeInfos.length];
}
// Set hashFunc
hashFunc = getConf().getBucketingVersion() == 2 && !vectorDesc.getIsAcidChange() ? ObjectInspectorUtils::getBucketHashCode : ObjectInspectorUtils::getBucketHashCodeOld;
// Set function to evaluate _bucket_number if needed.
if (reduceSinkKeyExpressions != null) {
for (VectorExpression ve : reduceSinkKeyExpressions) {
if (ve instanceof BucketNumExpression) {
bucketExpr = (BucketNumExpression) ve;
break;
}
}
}
}
use of org.apache.hadoop.hive.ql.exec.vector.expressions.BucketNumExpression in project hive by apache.
the class VectorReduceSinkObjectHashOperator method process.
@Override
public void process(Object row, int tag) throws HiveException {
try {
VectorizedRowBatch batch = (VectorizedRowBatch) row;
batchCounter++;
if (batch.size == 0) {
if (LOG.isDebugEnabled()) {
LOG.debug(CLASS_NAME + " batch #" + batchCounter + " empty");
}
return;
}
if (!isKeyInitialized) {
isKeyInitialized = true;
if (isEmptyKey) {
initializeEmptyKey(tag);
}
}
// Perform any key expressions. Results will go into scratch columns.
if (reduceSinkKeyExpressions != null) {
for (VectorExpression ve : reduceSinkKeyExpressions) {
// Handle _bucket_number
if (ve instanceof BucketNumExpression) {
// Evaluate per row
continue;
}
ve.evaluate(batch);
}
}
// Perform any value expressions. Results will go into scratch columns.
if (reduceSinkValueExpressions != null) {
for (VectorExpression ve : reduceSinkValueExpressions) {
ve.evaluate(batch);
}
}
// Perform any bucket expressions. Results will go into scratch columns.
if (reduceSinkBucketExpressions != null) {
for (VectorExpression ve : reduceSinkBucketExpressions) {
ve.evaluate(batch);
}
}
// Perform any partition expressions. Results will go into scratch columns.
if (reduceSinkPartitionExpressions != null) {
for (VectorExpression ve : reduceSinkPartitionExpressions) {
ve.evaluate(batch);
}
}
final boolean selectedInUse = batch.selectedInUse;
int[] selected = batch.selected;
final int size = batch.size;
for (int logical = 0; logical < size; logical++) {
final int batchIndex = (selectedInUse ? selected[logical] : logical);
int hashCode;
if (isEmptyPartitions) {
if (isSingleReducer) {
// Empty partition, single reducer -> constant hashCode
hashCode = 0;
} else {
// Empty partition, multiple reducers -> random hashCode
hashCode = nonPartitionRandom.nextInt();
}
} else {
// Compute hashCode from partitions
partitionVectorExtractRow.extractRow(batch, batchIndex, partitionFieldValues);
hashCode = hashFunc.apply(partitionFieldValues, partitionObjectInspectors);
}
// Compute hashCode from buckets
if (!isEmptyBuckets) {
bucketVectorExtractRow.extractRow(batch, batchIndex, bucketFieldValues);
final int bucketNum = ObjectInspectorUtils.getBucketNumber(hashFunc.apply(bucketFieldValues, bucketObjectInspectors), numBuckets);
if (bucketExpr != null) {
evaluateBucketExpr(batch, batchIndex, bucketNum);
}
hashCode = hashCode * 31 + bucketNum;
}
postProcess(batch, batchIndex, tag, hashCode);
}
} catch (Exception e) {
throw new HiveException(e);
}
}
Aggregations