use of org.apache.hadoop.hive.common.type.DataTypePhysicalVariation in project hive by apache.
the class VectorizationContext method checkExprNodeDescForDecimal64.
private boolean checkExprNodeDescForDecimal64(ExprNodeDesc exprNodeDesc) throws HiveException {
if (exprNodeDesc instanceof ExprNodeColumnDesc) {
int colIndex = getInputColumnIndex((ExprNodeColumnDesc) exprNodeDesc);
DataTypePhysicalVariation dataTypePhysicalVariation = getDataTypePhysicalVariation(colIndex);
return (dataTypePhysicalVariation == DataTypePhysicalVariation.DECIMAL_64);
} else if (exprNodeDesc instanceof ExprNodeGenericFuncDesc) {
// Is the result Decimal64 precision?
TypeInfo returnType = exprNodeDesc.getTypeInfo();
if (!checkTypeInfoForDecimal64(returnType)) {
return false;
}
DecimalTypeInfo returnDecimalType = (DecimalTypeInfo) returnType;
GenericUDF udf = ((ExprNodeGenericFuncDesc) exprNodeDesc).getGenericUDF();
Class<?> udfClass = udf.getClass();
// We have a class-level annotation that says whether the UDF's vectorization expressions
// support Decimal64.
VectorizedExpressionsSupportDecimal64 annotation = AnnotationUtils.getAnnotation(udfClass, VectorizedExpressionsSupportDecimal64.class);
if (annotation == null) {
return false;
}
// Carefully check the children to make sure they are Decimal64.
List<ExprNodeDesc> children = exprNodeDesc.getChildren();
for (ExprNodeDesc childExprNodeDesc : children) {
if (childExprNodeDesc instanceof ExprNodeConstantDesc) {
DecimalTypeInfo childDecimalTypeInfo = decimalTypeFromCastToDecimal(childExprNodeDesc, returnDecimalType);
if (childDecimalTypeInfo == null) {
return false;
}
if (!checkTypeInfoForDecimal64(childDecimalTypeInfo)) {
return false;
}
continue;
}
// Otherwise, recurse.
if (!checkExprNodeDescForDecimal64(childExprNodeDesc)) {
return false;
}
}
return true;
} else if (exprNodeDesc instanceof ExprNodeConstantDesc) {
return checkTypeInfoForDecimal64(exprNodeDesc.getTypeInfo());
}
return false;
}
use of org.apache.hadoop.hive.common.type.DataTypePhysicalVariation in project hive by apache.
the class VectorizationContext method getIdentityExpression.
/**
* Used as a fast path for operations that don't modify their input, like unary +
* and casting boolean to long. IdentityExpression and its children are always
* projections.
*/
private VectorExpression getIdentityExpression(List<ExprNodeDesc> childExprList) throws HiveException {
ExprNodeDesc childExpr = childExprList.get(0);
int identityCol;
TypeInfo identityTypeInfo;
DataTypePhysicalVariation identityDataTypePhysicalVariation;
VectorExpression v1 = null;
if (childExpr instanceof ExprNodeGenericFuncDesc) {
v1 = getVectorExpression(childExpr);
identityCol = v1.getOutputColumnNum();
identityTypeInfo = v1.getOutputTypeInfo();
identityDataTypePhysicalVariation = v1.getOutputDataTypePhysicalVariation();
} else if (childExpr instanceof ExprNodeColumnDesc) {
ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) childExpr;
identityCol = getInputColumnIndex(colDesc.getColumn());
identityTypeInfo = colDesc.getTypeInfo();
// CONSIDER: Validation of type information
identityDataTypePhysicalVariation = getDataTypePhysicalVariation(identityCol);
} else {
throw new HiveException("Expression not supported: " + childExpr);
}
VectorExpression ve = new IdentityExpression(identityCol);
if (v1 != null) {
ve.setChildExpressions(new VectorExpression[] { v1 });
}
ve.setInputTypeInfos(identityTypeInfo);
ve.setInputDataTypePhysicalVariations(identityDataTypePhysicalVariation);
ve.setOutputTypeInfo(identityTypeInfo);
ve.setOutputDataTypePhysicalVariation(identityDataTypePhysicalVariation);
return ve;
}
use of org.apache.hadoop.hive.common.type.DataTypePhysicalVariation in project hive by apache.
the class VectorizationContext method getColumnVectorExpression.
private VectorExpression getColumnVectorExpression(ExprNodeColumnDesc exprDesc, VectorExpressionDescriptor.Mode mode) throws HiveException {
int columnNum = getInputColumnIndex(exprDesc.getColumn());
VectorExpression expr = null;
switch(mode) {
case FILTER:
// Evaluate the column as a boolean, converting if necessary.
TypeInfo typeInfo = exprDesc.getTypeInfo();
if (typeInfo.getCategory() == Category.PRIMITIVE && ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory() == PrimitiveCategory.BOOLEAN) {
expr = new SelectColumnIsTrue(columnNum);
} else {
// Ok, we need to convert.
ArrayList<ExprNodeDesc> exprAsList = new ArrayList<ExprNodeDesc>(1);
exprAsList.add(exprDesc);
// First try our cast method that will handle a few special cases.
VectorExpression castToBooleanExpr = getCastToBoolean(exprAsList);
if (castToBooleanExpr == null) {
// Ok, try the UDF.
castToBooleanExpr = getVectorExpressionForUdf(null, UDFToBoolean.class, exprAsList, VectorExpressionDescriptor.Mode.PROJECTION, TypeInfoFactory.booleanTypeInfo);
if (castToBooleanExpr == null) {
throw new HiveException("Cannot vectorize converting expression " + exprDesc.getExprString() + " to boolean");
}
}
final int outputColumnNum = castToBooleanExpr.getOutputColumnNum();
expr = new SelectColumnIsTrue(outputColumnNum);
expr.setChildExpressions(new VectorExpression[] { castToBooleanExpr });
expr.setInputTypeInfos(castToBooleanExpr.getOutputTypeInfo());
expr.setInputDataTypePhysicalVariations(DataTypePhysicalVariation.NONE);
}
break;
case PROJECTION:
{
expr = new IdentityExpression(columnNum);
TypeInfo identityTypeInfo = exprDesc.getTypeInfo();
DataTypePhysicalVariation identityDataTypePhysicalVariation = getDataTypePhysicalVariation(columnNum);
expr.setInputTypeInfos(identityTypeInfo);
expr.setInputDataTypePhysicalVariations(identityDataTypePhysicalVariation);
expr.setOutputTypeInfo(identityTypeInfo);
expr.setOutputDataTypePhysicalVariation(identityDataTypePhysicalVariation);
}
break;
}
return expr;
}
use of org.apache.hadoop.hive.common.type.DataTypePhysicalVariation in project hive by apache.
the class VectorizedRowBatchCtx method createVectorizedRowBatch.
/**
* Creates a Vectorized row batch and the column vectors.
*
* @return VectorizedRowBatch
* @throws HiveException
*/
public VectorizedRowBatch createVectorizedRowBatch() {
final int nonScratchColumnCount = rowColumnTypeInfos.length;
final int totalColumnCount = nonScratchColumnCount + scratchColumnTypeNames.length;
VectorizedRowBatch result = new VectorizedRowBatch(totalColumnCount);
if (dataColumnNums == null) {
// All data and partition columns.
for (int i = 0; i < nonScratchColumnCount; i++) {
result.cols[i] = createColumnVectorFromRowColumnTypeInfos(i);
}
} else {
// Create only needed/included columns data columns.
for (int i = 0; i < dataColumnNums.length; i++) {
int columnNum = dataColumnNums[i];
Preconditions.checkState(columnNum < nonScratchColumnCount);
result.cols[columnNum] = createColumnVectorFromRowColumnTypeInfos(columnNum);
}
// Always create partition and virtual columns.
final int partitionEndColumnNum = dataColumnCount + partitionColumnCount;
for (int partitionColumnNum = dataColumnCount; partitionColumnNum < partitionEndColumnNum; partitionColumnNum++) {
result.cols[partitionColumnNum] = VectorizedBatchUtil.createColumnVector(rowColumnTypeInfos[partitionColumnNum]);
}
final int virtualEndColumnNum = partitionEndColumnNum + virtualColumnCount;
for (int virtualColumnNum = partitionEndColumnNum; virtualColumnNum < virtualEndColumnNum; virtualColumnNum++) {
String virtualColumnName = rowColumnNames[virtualColumnNum];
if (!isVirtualColumnNeeded(virtualColumnName)) {
continue;
}
result.cols[virtualColumnNum] = VectorizedBatchUtil.createColumnVector(rowColumnTypeInfos[virtualColumnNum]);
}
}
for (int i = 0; i < scratchColumnTypeNames.length; i++) {
String typeName = scratchColumnTypeNames[i];
DataTypePhysicalVariation dataTypePhysicalVariation = scratchDataTypePhysicalVariations[i];
result.cols[nonScratchColumnCount + i] = VectorizedBatchUtil.createColumnVector(typeName, dataTypePhysicalVariation);
}
// UNDONE: Also remember virtualColumnCount...
result.setPartitionInfo(dataColumnCount, partitionColumnCount);
result.reset();
return result;
}
use of org.apache.hadoop.hive.common.type.DataTypePhysicalVariation in project hive by apache.
the class VectorizationContext method createVectorExpression.
private VectorExpression createVectorExpression(Class<?> vectorClass, List<ExprNodeDesc> childExpr, VectorExpressionDescriptor.Mode childrenMode, TypeInfo returnType) throws HiveException {
int numChildren = childExpr == null ? 0 : childExpr.size();
TypeInfo[] inputTypeInfos = new TypeInfo[numChildren];
DataTypePhysicalVariation[] inputDataTypePhysicalVariations = new DataTypePhysicalVariation[numChildren];
List<VectorExpression> children = new ArrayList<VectorExpression>();
Object[] arguments = new Object[numChildren];
for (int i = 0; i < numChildren; i++) {
ExprNodeDesc child = childExpr.get(i);
TypeInfo childTypeInfo = child.getTypeInfo();
inputTypeInfos[i] = childTypeInfo;
// Assume.
inputDataTypePhysicalVariations[i] = DataTypePhysicalVariation.NONE;
if ((child instanceof ExprNodeGenericFuncDesc) || (child instanceof ExprNodeFieldDesc)) {
VectorExpression vChild = getVectorExpression(child, childrenMode);
children.add(vChild);
arguments[i] = vChild.getOutputColumnNum();
// Update.
inputDataTypePhysicalVariations[i] = vChild.getOutputDataTypePhysicalVariation();
} else if (child instanceof ExprNodeColumnDesc) {
int colIndex = getInputColumnIndex((ExprNodeColumnDesc) child);
if (childTypeInfo instanceof DecimalTypeInfo) {
// In this method, we must only process non-Decimal64 column vectors.
// Convert Decimal64 columns to regular decimal.
DataTypePhysicalVariation dataTypePhysicalVariation = getDataTypePhysicalVariation(colIndex);
if (dataTypePhysicalVariation != null && dataTypePhysicalVariation == DataTypePhysicalVariation.DECIMAL_64) {
// FUTURE: Can we reuse this conversion?
VectorExpression vChild = createDecimal64ToDecimalConversion(colIndex, childTypeInfo);
children.add(vChild);
arguments[i] = vChild.getOutputColumnNum();
// Update.
inputDataTypePhysicalVariations[i] = vChild.getOutputDataTypePhysicalVariation();
continue;
}
}
if (childrenMode == VectorExpressionDescriptor.Mode.FILTER) {
// In filter mode, the column must be a boolean
SelectColumnIsTrue selectColumnIsTrue = new SelectColumnIsTrue(colIndex);
selectColumnIsTrue.setInputTypeInfos(childTypeInfo);
selectColumnIsTrue.setInputDataTypePhysicalVariations(DataTypePhysicalVariation.NONE);
children.add(selectColumnIsTrue);
}
arguments[i] = colIndex;
} else if (child instanceof ExprNodeConstantDesc) {
Object scalarValue = getVectorTypeScalarValue((ExprNodeConstantDesc) child);
arguments[i] = (null == scalarValue) ? getConstantVectorExpression(null, child.getTypeInfo(), childrenMode) : scalarValue;
} else if (child instanceof ExprNodeDynamicValueDesc) {
arguments[i] = ((ExprNodeDynamicValueDesc) child).getDynamicValue();
} else {
throw new HiveException("Cannot handle expression type: " + child.getClass().getSimpleName());
}
}
VectorExpression vectorExpression = instantiateExpression(vectorClass, returnType, DataTypePhysicalVariation.NONE, arguments);
if (vectorExpression == null) {
handleCouldNotInstantiateVectorExpression(vectorClass, returnType, DataTypePhysicalVariation.NONE, arguments);
}
vectorExpression.setInputTypeInfos(inputTypeInfos);
vectorExpression.setInputDataTypePhysicalVariations(inputDataTypePhysicalVariations);
if ((vectorExpression != null) && !children.isEmpty()) {
vectorExpression.setChildExpressions(children.toArray(new VectorExpression[0]));
}
for (VectorExpression ve : children) {
ocm.freeOutputColumn(ve.getOutputColumnNum());
}
return vectorExpression;
}
Aggregations