use of org.apache.hadoop.hive.ql.exec.vector.VectorAggregationDesc in project hive by apache.
the class Vectorizer method doVectorizeGroupByOperator.
/*
* NOTE: The VectorGroupByDesc has already been allocated and will be updated here.
*/
private static ImmutablePair<Operator<? extends OperatorDesc>, String> doVectorizeGroupByOperator(Operator<? extends OperatorDesc> groupByOp, VectorizationContext vContext, VectorGroupByDesc vectorGroupByDesc) throws HiveException {
GroupByDesc groupByDesc = (GroupByDesc) groupByOp.getConf();
List<ExprNodeDesc> keysDesc = groupByDesc.getKeys();
// For now, we don't support group by on DECIMAL_64 keys.
VectorExpression[] vecKeyExpressions = vContext.getVectorExpressionsUpConvertDecimal64(keysDesc);
ArrayList<AggregationDesc> aggrDesc = groupByDesc.getAggregators();
final int size = aggrDesc.size();
VectorAggregationDesc[] vecAggrDescs = new VectorAggregationDesc[size];
int[] projectedOutputColumns = new int[size];
for (int i = 0; i < size; ++i) {
AggregationDesc aggDesc = aggrDesc.get(i);
ImmutablePair<VectorAggregationDesc, String> pair = getVectorAggregationDesc(aggDesc, vContext);
if (pair.left == null) {
return new ImmutablePair<Operator<? extends OperatorDesc>, String>(null, pair.right);
}
vecAggrDescs[i] = pair.left;
// GroupBy generates a new vectorized row batch...
projectedOutputColumns[i] = i;
}
vectorGroupByDesc.setKeyExpressions(vecKeyExpressions);
vectorGroupByDesc.setVecAggrDescs(vecAggrDescs);
vectorGroupByDesc.setProjectedOutputColumns(projectedOutputColumns);
Operator<GroupByDesc> vectorOp = OperatorFactory.getVectorOperator(groupByOp.getCompilationOpContext(), groupByDesc, vContext, vectorGroupByDesc);
return new ImmutablePair<Operator<? extends OperatorDesc>, String>(vectorOp, null);
}
use of org.apache.hadoop.hive.ql.exec.vector.VectorAggregationDesc in project hive by apache.
the class Vectorizer method validateAndVectorizeOperator.
public Operator<? extends OperatorDesc> validateAndVectorizeOperator(Operator<? extends OperatorDesc> op, VectorizationContext vContext, boolean isReduce, boolean isTezOrSpark, VectorTaskColumnInfo vectorTaskColumnInfo) throws HiveException, VectorizerCannotVectorizeException {
Operator<? extends OperatorDesc> vectorOp = null;
// This "global" allows various validation methods to set the "not vectorized" reason.
currentOperator = op;
boolean isNative;
try {
switch(op.getType()) {
case MAPJOIN:
{
if (op instanceof MapJoinOperator) {
if (!validateMapJoinOperator((MapJoinOperator) op)) {
throw new VectorizerCannotVectorizeException();
}
} else if (op instanceof SMBMapJoinOperator) {
if (!validateSMBMapJoinOperator((SMBMapJoinOperator) op)) {
throw new VectorizerCannotVectorizeException();
}
} else {
setOperatorNotSupported(op);
throw new VectorizerCannotVectorizeException();
}
if (op instanceof MapJoinOperator) {
MapJoinDesc desc = (MapJoinDesc) op.getConf();
VectorMapJoinDesc vectorMapJoinDesc = new VectorMapJoinDesc();
boolean specialize = canSpecializeMapJoin(op, desc, isTezOrSpark, vContext, vectorMapJoinDesc);
if (!specialize) {
Class<? extends Operator<?>> opClass = null;
// *NON-NATIVE* vector map differences for LEFT OUTER JOIN and Filtered...
List<ExprNodeDesc> bigTableFilters = desc.getFilters().get((byte) desc.getPosBigTable());
boolean isOuterAndFiltered = (!desc.isNoOuterJoin() && bigTableFilters.size() > 0);
if (!isOuterAndFiltered) {
opClass = VectorMapJoinOperator.class;
} else {
opClass = VectorMapJoinOuterFilteredOperator.class;
}
vectorOp = OperatorFactory.getVectorOperator(opClass, op.getCompilationOpContext(), desc, vContext, vectorMapJoinDesc);
isNative = false;
} else {
// TEMPORARY Until Native Vector Map Join with Hybrid passes tests...
// HiveConf.setBoolVar(physicalContext.getConf(),
// HiveConf.ConfVars.HIVEUSEHYBRIDGRACEHASHJOIN, false);
vectorOp = specializeMapJoinOperator(op, vContext, desc, vectorMapJoinDesc);
isNative = true;
if (vectorTaskColumnInfo != null) {
VectorMapJoinInfo vectorMapJoinInfo = vectorMapJoinDesc.getVectorMapJoinInfo();
if (usesVectorUDFAdaptor(vectorMapJoinDesc.getAllBigTableKeyExpressions())) {
vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true);
}
if (usesVectorUDFAdaptor(vectorMapJoinDesc.getAllBigTableValueExpressions())) {
vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true);
}
}
}
} else {
Preconditions.checkState(op instanceof SMBMapJoinOperator);
SMBJoinDesc smbJoinSinkDesc = (SMBJoinDesc) op.getConf();
VectorSMBJoinDesc vectorSMBJoinDesc = new VectorSMBJoinDesc();
vectorOp = OperatorFactory.getVectorOperator(op.getCompilationOpContext(), smbJoinSinkDesc, vContext, vectorSMBJoinDesc);
isNative = false;
}
}
break;
case REDUCESINK:
{
if (!validateReduceSinkOperator((ReduceSinkOperator) op)) {
throw new VectorizerCannotVectorizeException();
}
ReduceSinkDesc reduceDesc = (ReduceSinkDesc) op.getConf();
VectorReduceSinkDesc vectorReduceSinkDesc = new VectorReduceSinkDesc();
boolean specialize = canSpecializeReduceSink(reduceDesc, isTezOrSpark, vContext, vectorReduceSinkDesc);
if (!specialize) {
vectorOp = OperatorFactory.getVectorOperator(op.getCompilationOpContext(), reduceDesc, vContext, vectorReduceSinkDesc);
isNative = false;
} else {
vectorOp = specializeReduceSinkOperator(op, vContext, reduceDesc, vectorReduceSinkDesc);
isNative = true;
if (vectorTaskColumnInfo != null) {
VectorReduceSinkInfo vectorReduceSinkInfo = vectorReduceSinkDesc.getVectorReduceSinkInfo();
if (usesVectorUDFAdaptor(vectorReduceSinkInfo.getReduceSinkKeyExpressions())) {
vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true);
}
if (usesVectorUDFAdaptor(vectorReduceSinkInfo.getReduceSinkValueExpressions())) {
vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true);
}
}
}
}
break;
case FILTER:
{
if (!validateFilterOperator((FilterOperator) op)) {
throw new VectorizerCannotVectorizeException();
}
VectorFilterDesc vectorFilterDesc = new VectorFilterDesc();
vectorOp = vectorizeFilterOperator(op, vContext, vectorFilterDesc);
isNative = true;
if (vectorTaskColumnInfo != null) {
VectorExpression vectorPredicateExpr = vectorFilterDesc.getPredicateExpression();
if (usesVectorUDFAdaptor(vectorPredicateExpr)) {
vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true);
}
}
}
break;
case SELECT:
{
if (!validateSelectOperator((SelectOperator) op)) {
throw new VectorizerCannotVectorizeException();
}
VectorSelectDesc vectorSelectDesc = new VectorSelectDesc();
vectorOp = vectorizeSelectOperator(op, vContext, vectorSelectDesc);
isNative = true;
if (vectorTaskColumnInfo != null) {
VectorExpression[] vectorSelectExprs = vectorSelectDesc.getSelectExpressions();
if (usesVectorUDFAdaptor(vectorSelectExprs)) {
vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true);
}
}
}
break;
case GROUPBY:
{
// The validateGroupByOperator method will update vectorGroupByDesc.
VectorGroupByDesc vectorGroupByDesc = new VectorGroupByDesc();
if (!validateGroupByOperator((GroupByOperator) op, isReduce, isTezOrSpark, vectorGroupByDesc)) {
throw new VectorizerCannotVectorizeException();
}
ImmutablePair<Operator<? extends OperatorDesc>, String> pair = doVectorizeGroupByOperator(op, vContext, vectorGroupByDesc);
if (pair.left == null) {
setOperatorIssue(pair.right);
throw new VectorizerCannotVectorizeException();
}
vectorOp = pair.left;
isNative = false;
if (vectorTaskColumnInfo != null) {
VectorExpression[] vecKeyExpressions = vectorGroupByDesc.getKeyExpressions();
if (usesVectorUDFAdaptor(vecKeyExpressions)) {
vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true);
}
VectorAggregationDesc[] vecAggrDescs = vectorGroupByDesc.getVecAggrDescs();
for (VectorAggregationDesc vecAggrDesc : vecAggrDescs) {
if (usesVectorUDFAdaptor(vecAggrDesc.getInputExpression())) {
vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true);
}
}
}
}
break;
case FILESINK:
{
if (!validateFileSinkOperator((FileSinkOperator) op)) {
throw new VectorizerCannotVectorizeException();
}
FileSinkDesc fileSinkDesc = (FileSinkDesc) op.getConf();
VectorFileSinkDesc vectorFileSinkDesc = new VectorFileSinkDesc();
vectorOp = OperatorFactory.getVectorOperator(op.getCompilationOpContext(), fileSinkDesc, vContext, vectorFileSinkDesc);
isNative = false;
}
break;
case LIMIT:
{
// No validation.
LimitDesc limitDesc = (LimitDesc) op.getConf();
VectorLimitDesc vectorLimitDesc = new VectorLimitDesc();
vectorOp = OperatorFactory.getVectorOperator(op.getCompilationOpContext(), limitDesc, vContext, vectorLimitDesc);
isNative = true;
}
break;
case EVENT:
{
// No validation.
AppMasterEventDesc eventDesc = (AppMasterEventDesc) op.getConf();
VectorAppMasterEventDesc vectorEventDesc = new VectorAppMasterEventDesc();
vectorOp = OperatorFactory.getVectorOperator(op.getCompilationOpContext(), eventDesc, vContext, vectorEventDesc);
isNative = true;
}
break;
case PTF:
{
// The validatePTFOperator method will update vectorPTFDesc.
VectorPTFDesc vectorPTFDesc = new VectorPTFDesc();
if (!validatePTFOperator((PTFOperator) op, vContext, vectorPTFDesc)) {
throw new VectorizerCannotVectorizeException();
}
vectorOp = vectorizePTFOperator(op, vContext, vectorPTFDesc);
isNative = true;
}
break;
case HASHTABLESINK:
{
// No validation.
SparkHashTableSinkDesc sparkHashTableSinkDesc = (SparkHashTableSinkDesc) op.getConf();
VectorSparkHashTableSinkDesc vectorSparkHashTableSinkDesc = new VectorSparkHashTableSinkDesc();
vectorOp = OperatorFactory.getVectorOperator(op.getCompilationOpContext(), sparkHashTableSinkDesc, vContext, vectorSparkHashTableSinkDesc);
isNative = true;
}
break;
case SPARKPRUNINGSINK:
{
// No validation.
SparkPartitionPruningSinkDesc sparkPartitionPruningSinkDesc = (SparkPartitionPruningSinkDesc) op.getConf();
VectorSparkPartitionPruningSinkDesc vectorSparkPartitionPruningSinkDesc = new VectorSparkPartitionPruningSinkDesc();
vectorOp = OperatorFactory.getVectorOperator(op.getCompilationOpContext(), sparkPartitionPruningSinkDesc, vContext, vectorSparkPartitionPruningSinkDesc);
// need to maintain the unique ID so that target map works can
// read the output
((SparkPartitionPruningSinkOperator) vectorOp).setUniqueId(((SparkPartitionPruningSinkOperator) op).getUniqueId());
isNative = true;
}
break;
default:
setOperatorNotSupported(op);
throw new VectorizerCannotVectorizeException();
}
} catch (HiveException e) {
setOperatorIssue(e.getMessage());
throw new VectorizerCannotVectorizeException();
}
Preconditions.checkState(vectorOp != null);
if (vectorTaskColumnInfo != null && !isNative) {
vectorTaskColumnInfo.setAllNative(false);
}
LOG.debug("vectorizeOperator " + vectorOp.getClass().getName());
LOG.debug("vectorizeOperator " + vectorOp.getConf().getClass().getName());
return vectorOp;
}
use of org.apache.hadoop.hive.ql.exec.vector.VectorAggregationDesc in project hive by apache.
the class Vectorizer method getVectorAggregationDesc.
private static ImmutablePair<VectorAggregationDesc, String> getVectorAggregationDesc(AggregationDesc aggrDesc, VectorizationContext vContext) throws HiveException {
String aggregateName = aggrDesc.getGenericUDAFName();
ArrayList<ExprNodeDesc> parameterList = aggrDesc.getParameters();
final int parameterCount = parameterList.size();
final GenericUDAFEvaluator.Mode udafEvaluatorMode = aggrDesc.getMode();
/*
* Look at evaluator to get output type info.
*/
GenericUDAFEvaluator evaluator = aggrDesc.getGenericUDAFEvaluator();
ArrayList<ExprNodeDesc> parameters = aggrDesc.getParameters();
ObjectInspector[] parameterObjectInspectors = new ObjectInspector[parameterCount];
for (int i = 0; i < parameterCount; i++) {
TypeInfo typeInfo = parameters.get(i).getTypeInfo();
parameterObjectInspectors[i] = TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(typeInfo);
}
// The only way to get the return object inspector (and its return type) is to
// initialize it...
ObjectInspector returnOI = evaluator.init(aggrDesc.getMode(), parameterObjectInspectors);
VectorizedUDAFs annotation = AnnotationUtils.getAnnotation(evaluator.getClass(), VectorizedUDAFs.class);
if (annotation == null) {
String issue = "Evaluator " + evaluator.getClass().getSimpleName() + " does not have a " + "vectorized UDAF annotation (aggregation: \"" + aggregateName + "\"). " + "Vectorization not supported";
return new ImmutablePair<VectorAggregationDesc, String>(null, issue);
}
final Class<? extends VectorAggregateExpression>[] vecAggrClasses = annotation.value();
final TypeInfo outputTypeInfo = TypeInfoUtils.getTypeInfoFromTypeString(returnOI.getTypeName());
// Not final since it may change later due to DECIMAL_64.
ColumnVector.Type outputColVectorType = VectorizationContext.getColumnVectorTypeFromTypeInfo(outputTypeInfo);
/*
* Determine input type info.
*/
final TypeInfo inputTypeInfo;
// Not final since it may change later due to DECIMAL_64.
VectorExpression inputExpression;
ColumnVector.Type inputColVectorType;
if (parameterCount == 0) {
// COUNT(*)
inputTypeInfo = null;
inputColVectorType = null;
inputExpression = null;
} else if (parameterCount == 1) {
ExprNodeDesc exprNodeDesc = parameterList.get(0);
inputTypeInfo = exprNodeDesc.getTypeInfo();
if (inputTypeInfo == null) {
String issue = "Aggregations with null parameter type not supported " + aggregateName + "(" + parameterList.toString() + ")";
return new ImmutablePair<VectorAggregationDesc, String>(null, issue);
}
/*
* Determine an *initial* input vector expression.
*
* Note: we may have to convert it later from DECIMAL_64 to regular decimal.
*/
inputExpression = vContext.getVectorExpression(exprNodeDesc, VectorExpressionDescriptor.Mode.PROJECTION);
if (inputExpression == null) {
String issue = "Parameter expression " + exprNodeDesc.toString() + " not supported " + aggregateName + "(" + parameterList.toString() + ")";
return new ImmutablePair<VectorAggregationDesc, String>(null, issue);
}
if (inputExpression.getOutputTypeInfo() == null) {
String issue = "Parameter expression " + exprNodeDesc.toString() + " with null type not supported " + aggregateName + "(" + parameterList.toString() + ")";
return new ImmutablePair<VectorAggregationDesc, String>(null, issue);
}
inputColVectorType = inputExpression.getOutputColumnVectorType();
} else {
// No multi-parameter aggregations supported.
String issue = "Aggregations with > 1 parameter are not supported " + aggregateName + "(" + parameterList.toString() + ")";
return new ImmutablePair<VectorAggregationDesc, String>(null, issue);
}
/*
* When we have DECIMAL_64 as the input parameter then we have to see if there is a special
* vector UDAF for it. If not we will need to convert the input parameter.
*/
if (inputTypeInfo != null && inputColVectorType == ColumnVector.Type.DECIMAL_64) {
if (outputColVectorType == ColumnVector.Type.DECIMAL) {
DecimalTypeInfo outputDecimalTypeInfo = (DecimalTypeInfo) outputTypeInfo;
if (HiveDecimalWritable.isPrecisionDecimal64(outputDecimalTypeInfo.getPrecision())) {
// Try with DECIMAL_64 input and DECIMAL_64 output.
final Class<? extends VectorAggregateExpression> vecAggrClass = findVecAggrClass(vecAggrClasses, aggregateName, inputColVectorType, ColumnVector.Type.DECIMAL_64, udafEvaluatorMode);
if (vecAggrClass != null) {
final VectorAggregationDesc vecAggrDesc = new VectorAggregationDesc(aggrDesc, evaluator, inputTypeInfo, inputColVectorType, inputExpression, outputTypeInfo, ColumnVector.Type.DECIMAL_64, vecAggrClass);
return new ImmutablePair<VectorAggregationDesc, String>(vecAggrDesc, null);
}
}
// Try with regular DECIMAL output type.
final Class<? extends VectorAggregateExpression> vecAggrClass = findVecAggrClass(vecAggrClasses, aggregateName, inputColVectorType, outputColVectorType, udafEvaluatorMode);
if (vecAggrClass != null) {
final VectorAggregationDesc vecAggrDesc = new VectorAggregationDesc(aggrDesc, evaluator, inputTypeInfo, inputColVectorType, inputExpression, outputTypeInfo, outputColVectorType, vecAggrClass);
return new ImmutablePair<VectorAggregationDesc, String>(vecAggrDesc, null);
}
// No support for DECIMAL_64 input. We must convert.
inputExpression = vContext.wrapWithDecimal64ToDecimalConversion(inputExpression);
inputColVectorType = ColumnVector.Type.DECIMAL;
// Fall through...
} else {
// Try with with DECIMAL_64 input and desired output type.
final Class<? extends VectorAggregateExpression> vecAggrClass = findVecAggrClass(vecAggrClasses, aggregateName, inputColVectorType, outputColVectorType, udafEvaluatorMode);
if (vecAggrClass != null) {
final VectorAggregationDesc vecAggrDesc = new VectorAggregationDesc(aggrDesc, evaluator, inputTypeInfo, inputColVectorType, inputExpression, outputTypeInfo, outputColVectorType, vecAggrClass);
return new ImmutablePair<VectorAggregationDesc, String>(vecAggrDesc, null);
}
// No support for DECIMAL_64 input. We must convert.
inputExpression = vContext.wrapWithDecimal64ToDecimalConversion(inputExpression);
inputColVectorType = ColumnVector.Type.DECIMAL;
// Fall through...
}
}
/*
* Look for normal match.
*/
Class<? extends VectorAggregateExpression> vecAggrClass = findVecAggrClass(vecAggrClasses, aggregateName, inputColVectorType, outputColVectorType, udafEvaluatorMode);
if (vecAggrClass != null) {
final VectorAggregationDesc vecAggrDesc = new VectorAggregationDesc(aggrDesc, evaluator, inputTypeInfo, inputColVectorType, inputExpression, outputTypeInfo, outputColVectorType, vecAggrClass);
return new ImmutablePair<VectorAggregationDesc, String>(vecAggrDesc, null);
}
// No match?
String issue = "Vector aggregation : \"" + aggregateName + "\" " + "for input type: " + (inputColVectorType == null ? "any" : "\"" + inputColVectorType) + "\" " + "and output type: \"" + outputColVectorType + "\" " + "and mode: " + udafEvaluatorMode + " not supported for " + "evaluator " + evaluator.getClass().getSimpleName();
return new ImmutablePair<VectorAggregationDesc, String>(null, issue);
}
use of org.apache.hadoop.hive.ql.exec.vector.VectorAggregationDesc in project hive by apache.
the class TestVectorizer method testAggregateOnUDF.
@Test
public void testAggregateOnUDF() throws HiveException, VectorizerCannotVectorizeException {
ExprNodeColumnDesc colExprA = new ExprNodeColumnDesc(Integer.class, "col1", "T", false);
ExprNodeColumnDesc colExprB = new ExprNodeColumnDesc(Integer.class, "col2", "T", false);
List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>();
children.add(colExprA);
ExprNodeGenericFuncDesc exprNodeDesc = new ExprNodeGenericFuncDesc(TypeInfoFactory.intTypeInfo, new GenericUDFAbs(), children);
ArrayList<ExprNodeDesc> params = new ArrayList<ExprNodeDesc>();
params.add(exprNodeDesc);
List<ObjectInspector> paramOIs = new ArrayList<ObjectInspector>();
paramOIs.add(exprNodeDesc.getWritableObjectInspector());
AggregationDesc aggDesc = new AggregationDesc("sum", FunctionRegistry.getGenericUDAFEvaluator("sum", paramOIs, false, false), params, false, GenericUDAFEvaluator.Mode.PARTIAL1);
ArrayList<String> outputColumnNames = new ArrayList<String>();
outputColumnNames.add("_col0");
GroupByDesc desc = new GroupByDesc();
VectorGroupByDesc vectorDesc = new VectorGroupByDesc();
vectorDesc.setProcessingMode(ProcessingMode.HASH);
vectorDesc.setVecAggrDescs(new VectorAggregationDesc[] { new VectorAggregationDesc(aggDesc, new GenericUDAFSum.GenericUDAFSumLong(), TypeInfoFactory.longTypeInfo, ColumnVector.Type.LONG, null, TypeInfoFactory.longTypeInfo, ColumnVector.Type.LONG, VectorUDAFCountStar.class) });
desc.setOutputColumnNames(outputColumnNames);
ArrayList<AggregationDesc> aggDescList = new ArrayList<AggregationDesc>();
aggDescList.add(aggDesc);
desc.setAggregators(aggDescList);
ArrayList<ExprNodeDesc> grpByKeys = new ArrayList<ExprNodeDesc>();
grpByKeys.add(colExprB);
desc.setKeys(grpByKeys);
Operator<? extends OperatorDesc> gbyOp = OperatorFactory.get(new CompilationOpContext(), desc);
desc.setMode(GroupByDesc.Mode.HASH);
VectorizationContext ctx = new VectorizationContext("name", Arrays.asList(new String[] { "col1", "col2" }));
Vectorizer v = new Vectorizer();
v.testSetCurrentBaseWork(new MapWork());
VectorGroupByOperator vectorOp = (VectorGroupByOperator) Vectorizer.vectorizeGroupByOperator(gbyOp, ctx, vectorDesc);
Assert.assertEquals(VectorUDAFSumLong.class, vectorDesc.getVecAggrDescs()[0].getVecAggrClass());
}
Aggregations