use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression in project hive by apache.
the class VectorizationContext method getGenericUdfVectorExpression.
private VectorExpression getGenericUdfVectorExpression(GenericUDF udf, List<ExprNodeDesc> childExpr, VectorExpressionDescriptor.Mode mode, TypeInfo returnType) throws HiveException {
List<ExprNodeDesc> castedChildren = evaluateCastOnConstants(childExpr);
childExpr = castedChildren;
// First handle special cases. If one of the special case methods cannot handle it,
// it returns null.
VectorExpression ve = null;
if (udf instanceof GenericUDFBetween) {
ve = getBetweenExpression(childExpr, mode, returnType);
} else if (udf instanceof GenericUDFIn) {
ve = getInExpression(childExpr, mode, returnType);
} else if (udf instanceof GenericUDFIf) {
ve = getIfExpression((GenericUDFIf) udf, childExpr, mode, returnType);
} else if (udf instanceof GenericUDFWhen) {
ve = getWhenExpression(childExpr, mode, returnType);
} else if (udf instanceof GenericUDFOPPositive) {
ve = getIdentityExpression(childExpr);
} else if (udf instanceof GenericUDFCoalesce) {
ve = getCoalesceExpression(childExpr, mode, returnType);
} else if (udf instanceof GenericUDFElt) {
// Elt is a special case because it can take variable number of arguments.
ve = getEltExpression(childExpr, returnType);
} else if (udf instanceof GenericUDFGrouping) {
ve = getGroupingExpression((GenericUDFGrouping) udf, childExpr, returnType);
} else if (udf instanceof GenericUDFBridge) {
ve = getGenericUDFBridgeVectorExpression((GenericUDFBridge) udf, childExpr, mode, returnType);
} else if (udf instanceof GenericUDFToString) {
ve = getCastToString(childExpr, returnType);
} else if (udf instanceof GenericUDFToDecimal) {
ve = getCastToDecimal(childExpr, returnType);
} else if (udf instanceof GenericUDFToChar) {
ve = getCastToChar(childExpr, returnType);
} else if (udf instanceof GenericUDFToVarchar) {
ve = getCastToVarChar(childExpr, returnType);
} else if (udf instanceof GenericUDFToBinary) {
ve = getCastToBinary(childExpr, returnType);
} else if (udf instanceof GenericUDFTimestamp) {
ve = getCastToTimestamp((GenericUDFTimestamp) udf, childExpr, mode, returnType);
} else if (udf instanceof GenericUDFDate || udf instanceof GenericUDFToDate) {
ve = getIdentityForDateToDate(childExpr, returnType);
} else if (udf instanceof GenericUDFBucketNumber) {
int outCol = ocm.allocateOutputColumn(returnType);
ve = new BucketNumExpression(outCol);
ve.setInputTypeInfos(returnType);
ve.setOutputTypeInfo(returnType);
} else if (udf instanceof GenericUDFCastFormat) {
ve = getCastWithFormat(udf, childExpr, returnType);
}
if (ve != null) {
return ve;
}
// Now do a general lookup
Class<?> udfClass = udf.getClass();
boolean isSubstituted = false;
if (udf instanceof GenericUDFBridge) {
udfClass = ((GenericUDFBridge) udf).getUdfClass();
isSubstituted = true;
}
ve = getVectorExpressionForUdf((!isSubstituted ? udf : null), udfClass, castedChildren, mode, returnType);
return ve;
}
use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression in project hive by apache.
the class VectorizationContext method getColumnVectorExpression.
private VectorExpression getColumnVectorExpression(ExprNodeColumnDesc exprDesc, VectorExpressionDescriptor.Mode mode) throws HiveException {
int columnNum = getInputColumnIndex(exprDesc.getColumn());
VectorExpression expr;
switch(mode) {
case FILTER:
expr = getFilterOnBooleanColumnExpression(exprDesc, columnNum);
break;
case PROJECTION:
{
expr = new IdentityExpression(columnNum);
TypeInfo identityTypeInfo = exprDesc.getTypeInfo();
DataTypePhysicalVariation identityDataTypePhysicalVariation = getDataTypePhysicalVariation(columnNum);
expr.setInputTypeInfos(identityTypeInfo);
expr.setInputDataTypePhysicalVariations(identityDataTypePhysicalVariation);
expr.setOutputTypeInfo(identityTypeInfo);
expr.setOutputDataTypePhysicalVariation(identityDataTypePhysicalVariation);
}
break;
default:
throw new RuntimeException("Unexpected mode " + mode);
}
return expr;
}
use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression in project hive by apache.
the class VectorizationContext method getGenericUDFBridgeVectorExpression.
/**
* Invoke special handling for expressions that can't be vectorized by regular
* descriptor based lookup.
*/
private VectorExpression getGenericUDFBridgeVectorExpression(GenericUDFBridge udf, List<ExprNodeDesc> childExpr, VectorExpressionDescriptor.Mode mode, TypeInfo returnType) throws HiveException {
Class<? extends UDF> cl = udf.getUdfClass();
VectorExpression ve = null;
if (isCastToIntFamily(cl)) {
PrimitiveCategory integerPrimitiveCategory = getAnyIntegerPrimitiveCategoryFromUdfClass(cl);
ve = getCastToLongExpression(childExpr, integerPrimitiveCategory);
} else if (isCastToBoolean(cl)) {
ve = getCastToBooleanExpression(childExpr, mode);
} else if (isCastToFloatFamily(cl)) {
ve = getCastToDoubleExpression(cl, childExpr, returnType);
}
if (ve == null && childExpr instanceof ExprNodeGenericFuncDesc) {
ve = getCustomUDFExpression((ExprNodeGenericFuncDesc) childExpr, mode);
}
return ve;
}
use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression in project hive by apache.
the class VectorizationContext method getCastToBooleanExpression.
private VectorExpression getCastToBooleanExpression(List<ExprNodeDesc> childExpr, VectorExpressionDescriptor.Mode mode) throws HiveException {
ExprNodeDesc child = childExpr.get(0);
TypeInfo inputTypeInfo = child.getTypeInfo();
String inputType = inputTypeInfo.toString();
if (child instanceof ExprNodeConstantDesc) {
if (null == ((ExprNodeConstantDesc) child).getValue()) {
return getConstantVectorExpression(null, TypeInfoFactory.booleanTypeInfo, mode);
}
// Family of related JIRAs: HIVE-7421, HIVE-7422, and HIVE-7424.
return null;
}
VectorExpression ve;
// Long and double are handled using descriptors, string needs to be specially handled.
if (isStringFamily(inputType)) {
ve = createVectorExpression(CastStringToBoolean.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, TypeInfoFactory.booleanTypeInfo, DataTypePhysicalVariation.NONE);
} else {
// Ok, try the UDF.
ve = getVectorExpressionForUdf(null, UDFToBoolean.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, TypeInfoFactory.booleanTypeInfo);
}
if (ve == null || mode == VectorExpressionDescriptor.Mode.PROJECTION) {
return ve;
}
int outputColumnNum = ve.getOutputColumnNum();
SelectColumnIsTrue filterVectorExpr = new SelectColumnIsTrue(outputColumnNum);
filterVectorExpr.setChildExpressions(new VectorExpression[] { ve });
filterVectorExpr.setInputTypeInfos(ve.getOutputTypeInfo());
filterVectorExpr.setInputDataTypePhysicalVariations(DataTypePhysicalVariation.NONE);
return filterVectorExpr;
}
use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression in project hive by apache.
the class VectorMapJoinOperator method process.
@Override
public void process(Object row, int tag) throws HiveException {
VectorizedRowBatch inBatch = (VectorizedRowBatch) row;
// Preparation for hybrid grace hash join
this.tag = tag;
if (scratchBatch == null) {
scratchBatch = VectorizedBatchUtil.makeLike(inBatch);
}
if (null != bigTableFilterExpressions) {
for (VectorExpression ve : bigTableFilterExpressions) {
ve.evaluate(inBatch);
}
}
if (null != bigTableValueExpressions) {
for (VectorExpression ve : bigTableValueExpressions) {
ve.evaluate(inBatch);
}
}
for (VectorExpression ve : keyExpressions) {
ve.evaluate(inBatch);
}
keyWrapperBatch.evaluateBatch(inBatch);
keyValues = keyWrapperBatch.getVectorHashKeyWrappers();
//
for (batchIndex = 0; batchIndex < inBatch.size; ++batchIndex) {
super.process(row, tag);
}
// Set these two to invalid values so any attempt to use them
// outside the inner loop results in NPE/OutOfBounds errors
batchIndex = -1;
keyValues = null;
}
Aggregations