use of org.apache.hadoop.hive.common.type.DataTypePhysicalVariation in project hive by apache.
the class VectorizationContext method getCastToDecimal.
private VectorExpression getCastToDecimal(List<ExprNodeDesc> childExpr, TypeInfo returnType) throws HiveException {
ExprNodeDesc child = childExpr.get(0);
String inputType = childExpr.get(0).getTypeString();
if (child instanceof ExprNodeConstantDesc) {
// Return a constant vector expression
Object constantValue = ((ExprNodeConstantDesc) child).getValue();
HiveDecimal decimalValue = castConstantToDecimal(constantValue, child.getTypeInfo());
return getConstantVectorExpression(decimalValue, returnType, VectorExpressionDescriptor.Mode.PROJECTION);
}
if (isIntFamily(inputType)) {
return createVectorExpression(CastLongToDecimal.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType);
} else if (isFloatFamily(inputType)) {
return createVectorExpression(CastDoubleToDecimal.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType);
} else if (decimalTypePattern.matcher(inputType).matches()) {
if (child instanceof ExprNodeColumnDesc) {
int colIndex = getInputColumnIndex((ExprNodeColumnDesc) child);
DataTypePhysicalVariation dataTypePhysicalVariation = getDataTypePhysicalVariation(colIndex);
if (dataTypePhysicalVariation == DataTypePhysicalVariation.DECIMAL_64) {
// Do Decimal64 conversion instead.
return createDecimal64ToDecimalConversion(colIndex, returnType);
} else {
return createVectorExpression(CastDecimalToDecimal.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType);
}
} else {
return createVectorExpression(CastDecimalToDecimal.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType);
}
} else if (isStringFamily(inputType)) {
return createVectorExpression(CastStringToDecimal.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType);
} else if (inputType.equals("timestamp")) {
return createVectorExpression(CastTimestampToDecimal.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType);
}
return null;
}
use of org.apache.hadoop.hive.common.type.DataTypePhysicalVariation in project hive by apache.
the class VectorizationContext method getCustomUDFExpression.
/*
* Return vector expression for a custom (i.e. not built-in) UDF.
*/
private VectorExpression getCustomUDFExpression(ExprNodeGenericFuncDesc expr, VectorExpressionDescriptor.Mode mode) throws HiveException {
// Assume.
boolean isFilter = false;
if (mode == VectorExpressionDescriptor.Mode.FILTER) {
// Is output type a BOOLEAN?
TypeInfo resultTypeInfo = expr.getTypeInfo();
if (resultTypeInfo.getCategory() == Category.PRIMITIVE && ((PrimitiveTypeInfo) resultTypeInfo).getPrimitiveCategory() == PrimitiveCategory.BOOLEAN) {
isFilter = true;
} else {
return null;
}
}
// GenericUDFBridge udfBridge = (GenericUDFBridge) expr.getGenericUDF();
List<ExprNodeDesc> childExprList = expr.getChildren();
final int childrenCount = childExprList.size();
// argument descriptors
VectorUDFArgDesc[] argDescs = new VectorUDFArgDesc[childrenCount];
for (int i = 0; i < argDescs.length; i++) {
argDescs[i] = new VectorUDFArgDesc();
}
// positions of variable arguments (columns or non-constant expressions)
List<Integer> variableArgPositions = new ArrayList<Integer>();
// Column numbers of batch corresponding to expression result arguments
List<Integer> exprResultColumnNums = new ArrayList<Integer>();
// Prepare children
List<VectorExpression> vectorExprs = new ArrayList<VectorExpression>();
TypeInfo[] inputTypeInfos = new TypeInfo[childrenCount];
DataTypePhysicalVariation[] inputDataTypePhysicalVariations = new DataTypePhysicalVariation[childrenCount];
for (int i = 0; i < childrenCount; i++) {
ExprNodeDesc child = childExprList.get(i);
inputTypeInfos[i] = child.getTypeInfo();
inputDataTypePhysicalVariations[i] = DataTypePhysicalVariation.NONE;
if (child instanceof ExprNodeGenericFuncDesc) {
VectorExpression e = getVectorExpression(child, VectorExpressionDescriptor.Mode.PROJECTION);
vectorExprs.add(e);
variableArgPositions.add(i);
exprResultColumnNums.add(e.getOutputColumnNum());
argDescs[i].setVariable(e.getOutputColumnNum());
} else if (child instanceof ExprNodeColumnDesc) {
variableArgPositions.add(i);
argDescs[i].setVariable(getInputColumnIndex(((ExprNodeColumnDesc) child).getColumn()));
} else if (child instanceof ExprNodeConstantDesc) {
// this is a constant (or null)
argDescs[i].setConstant((ExprNodeConstantDesc) child);
} else if (child instanceof ExprNodeDynamicValueDesc) {
VectorExpression e = getVectorExpression(child, VectorExpressionDescriptor.Mode.PROJECTION);
vectorExprs.add(e);
variableArgPositions.add(i);
exprResultColumnNums.add(e.getOutputColumnNum());
argDescs[i].setVariable(e.getOutputColumnNum());
} else {
throw new HiveException("Unable to vectorize custom UDF. Encountered unsupported expr desc : " + child);
}
}
// Allocate output column and get column number;
TypeInfo resultTypeInfo = expr.getTypeInfo();
String resultTypeName = resultTypeInfo.getTypeName();
final int outputColumnNum = ocm.allocateOutputColumn(expr.getTypeInfo());
// Make vectorized operator
VectorExpression ve = new VectorUDFAdaptor(expr, outputColumnNum, resultTypeName, argDescs);
// Set child expressions
VectorExpression[] childVEs = null;
if (exprResultColumnNums.size() != 0) {
childVEs = new VectorExpression[exprResultColumnNums.size()];
for (int i = 0; i < childVEs.length; i++) {
childVEs[i] = vectorExprs.get(i);
}
}
ve.setChildExpressions(childVEs);
ve.setInputTypeInfos(inputTypeInfos);
ve.setInputDataTypePhysicalVariations(inputDataTypePhysicalVariations);
ve.setOutputTypeInfo(resultTypeInfo);
ve.setOutputDataTypePhysicalVariation(DataTypePhysicalVariation.NONE);
// Free output columns if inputs have non-leaf expression trees.
for (Integer i : exprResultColumnNums) {
ocm.freeOutputColumn(i);
}
if (isFilter) {
SelectColumnIsTrue filterVectorExpr = new SelectColumnIsTrue(outputColumnNum);
filterVectorExpr.setChildExpressions(new VectorExpression[] { ve });
filterVectorExpr.setInputTypeInfos(ve.getOutputTypeInfo());
filterVectorExpr.setInputDataTypePhysicalVariations(ve.getOutputDataTypePhysicalVariation());
return filterVectorExpr;
} else {
return ve;
}
}
use of org.apache.hadoop.hive.common.type.DataTypePhysicalVariation in project hive by apache.
the class VectorizationContext method getDecimal64VectorExpressionForUdf.
private VectorExpression getDecimal64VectorExpressionForUdf(GenericUDF genericUdf, Class<?> udfClass, List<ExprNodeDesc> childExpr, int numChildren, VectorExpressionDescriptor.Mode mode, TypeInfo returnType) throws HiveException {
ExprNodeDesc child1 = childExpr.get(0);
ExprNodeDesc child2 = childExpr.get(1);
DecimalTypeInfo decimalTypeInfo1 = (DecimalTypeInfo) child1.getTypeInfo();
DecimalTypeInfo decimalTypeInfo2 = (DecimalTypeInfo) child2.getTypeInfo();
DataTypePhysicalVariation dataTypePhysicalVariation1 = DataTypePhysicalVariation.DECIMAL_64;
DataTypePhysicalVariation dataTypePhysicalVariation2 = DataTypePhysicalVariation.DECIMAL_64;
final int scale1 = decimalTypeInfo1.scale();
final int scale2 = decimalTypeInfo2.scale();
VectorExpressionDescriptor.Builder builder = new VectorExpressionDescriptor.Builder();
builder.setNumArguments(numChildren);
builder.setMode(mode);
boolean isColumnScaleEstablished = false;
int columnScale = 0;
boolean hasScalar = false;
builder.setArgumentType(0, ArgumentType.DECIMAL_64);
if (child1 instanceof ExprNodeGenericFuncDesc || child1 instanceof ExprNodeColumnDesc) {
builder.setInputExpressionType(0, InputExpressionType.COLUMN);
isColumnScaleEstablished = true;
columnScale = scale1;
} else if (child1 instanceof ExprNodeConstantDesc) {
hasScalar = true;
builder.setInputExpressionType(0, InputExpressionType.SCALAR);
} else {
// Currently, only functions, columns, and scalars supported.
return null;
}
builder.setArgumentType(1, ArgumentType.DECIMAL_64);
if (child2 instanceof ExprNodeGenericFuncDesc || child2 instanceof ExprNodeColumnDesc) {
builder.setInputExpressionType(1, InputExpressionType.COLUMN);
if (!isColumnScaleEstablished) {
isColumnScaleEstablished = true;
columnScale = scale2;
} else if (columnScale != scale2) {
// We only support Decimal64 on 2 columns when the have the same scale.
return null;
}
} else if (child2 instanceof ExprNodeConstantDesc) {
// Cannot have SCALAR, SCALAR.
if (!isColumnScaleEstablished) {
return null;
}
hasScalar = true;
builder.setInputExpressionType(1, InputExpressionType.SCALAR);
} else {
// Currently, only functions, columns, and scalars supported.
return null;
}
VectorExpressionDescriptor.Descriptor descriptor = builder.build();
Class<?> vectorClass = this.vMap.getVectorExpressionClass(udfClass, descriptor, useCheckedVectorExpressions);
if (vectorClass == null) {
return null;
}
VectorExpressionDescriptor.Mode childrenMode = getChildrenMode(mode, udfClass);
/*
* Custom build arguments.
*/
List<VectorExpression> children = new ArrayList<VectorExpression>();
Object[] arguments = new Object[numChildren];
for (int i = 0; i < numChildren; i++) {
ExprNodeDesc child = childExpr.get(i);
if (child instanceof ExprNodeGenericFuncDesc) {
VectorExpression vChild = getVectorExpression(child, childrenMode);
children.add(vChild);
arguments[i] = vChild.getOutputColumnNum();
} else if (child instanceof ExprNodeColumnDesc) {
int colIndex = getInputColumnIndex((ExprNodeColumnDesc) child);
if (childrenMode == VectorExpressionDescriptor.Mode.FILTER) {
// In filter mode, the column must be a boolean
children.add(new SelectColumnIsTrue(colIndex));
}
arguments[i] = colIndex;
} else {
Preconditions.checkState(child instanceof ExprNodeConstantDesc);
ExprNodeConstantDesc constDesc = (ExprNodeConstantDesc) child;
HiveDecimal hiveDecimal = (HiveDecimal) constDesc.getValue();
if (hiveDecimal.scale() > columnScale) {
// For now, bail out on decimal constants with larger scale than column scale.
return null;
}
final long decimal64Scalar = new HiveDecimalWritable(hiveDecimal).serialize64(columnScale);
arguments[i] = decimal64Scalar;
}
}
/*
* Instantiate Decimal64 vector expression.
*
* The instantiateExpression method sets the output column and type information.
*/
VectorExpression vectorExpression = instantiateExpression(vectorClass, returnType, DataTypePhysicalVariation.DECIMAL_64, arguments);
if (vectorExpression == null) {
handleCouldNotInstantiateVectorExpression(vectorClass, returnType, DataTypePhysicalVariation.DECIMAL_64, arguments);
}
vectorExpression.setInputTypeInfos(decimalTypeInfo1, decimalTypeInfo2);
vectorExpression.setInputDataTypePhysicalVariations(dataTypePhysicalVariation1, dataTypePhysicalVariation2);
if ((vectorExpression != null) && !children.isEmpty()) {
vectorExpression.setChildExpressions(children.toArray(new VectorExpression[0]));
}
return vectorExpression;
}
use of org.apache.hadoop.hive.common.type.DataTypePhysicalVariation in project hive by apache.
the class VectorizedRowBatchCtx method createColumnVectorFromRowColumnTypeInfos.
private ColumnVector createColumnVectorFromRowColumnTypeInfos(int columnNum) {
TypeInfo typeInfo = rowColumnTypeInfos[columnNum];
final DataTypePhysicalVariation dataTypePhysicalVariation;
if (rowDataTypePhysicalVariations != null) {
dataTypePhysicalVariation = rowDataTypePhysicalVariations[columnNum];
} else {
dataTypePhysicalVariation = DataTypePhysicalVariation.NONE;
}
return VectorizedBatchUtil.createColumnVector(typeInfo, dataTypePhysicalVariation);
}
use of org.apache.hadoop.hive.common.type.DataTypePhysicalVariation in project hive by apache.
the class Vectorizer method debugDisplayVertexInfo.
public void debugDisplayVertexInfo(BaseWork work) {
VectorizedRowBatchCtx vectorizedRowBatchCtx = work.getVectorizedRowBatchCtx();
String[] allColumnNames = vectorizedRowBatchCtx.getRowColumnNames();
TypeInfo[] columnTypeInfos = vectorizedRowBatchCtx.getRowColumnTypeInfos();
DataTypePhysicalVariation[] dataTypePhysicalVariations = vectorizedRowBatchCtx.getRowdataTypePhysicalVariations();
int partitionColumnCount = vectorizedRowBatchCtx.getPartitionColumnCount();
int virtualColumnCount = vectorizedRowBatchCtx.getVirtualColumnCount();
String[] scratchColumnTypeNames = vectorizedRowBatchCtx.getScratchColumnTypeNames();
DataTypePhysicalVariation[] scratchdataTypePhysicalVariations = vectorizedRowBatchCtx.getScratchDataTypePhysicalVariations();
LOG.debug("debugDisplayVertexInfo rowColumnNames " + Arrays.toString(allColumnNames));
LOG.debug("debugDisplayVertexInfo rowColumnTypeInfos " + Arrays.toString(columnTypeInfos));
LOG.debug("debugDisplayVertexInfo rowDataTypePhysicalVariations " + (dataTypePhysicalVariations == null ? "NULL" : Arrays.toString(dataTypePhysicalVariations)));
LOG.debug("debugDisplayVertexInfo partitionColumnCount " + partitionColumnCount);
LOG.debug("debugDisplayVertexInfo virtualColumnCount " + virtualColumnCount);
LOG.debug("debugDisplayVertexInfo scratchColumnTypeNames " + Arrays.toString(scratchColumnTypeNames));
LOG.debug("debugDisplayVertexInfo scratchdataTypePhysicalVariations " + (scratchdataTypePhysicalVariations == null ? "NULL" : Arrays.toString(scratchdataTypePhysicalVariations)));
}
Aggregations