use of org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression in project hive by apache.
the class VectorizationContext method getAggregatorExpression.
public VectorAggregateExpression getAggregatorExpression(AggregationDesc desc) throws HiveException {
ArrayList<ExprNodeDesc> paramDescList = desc.getParameters();
VectorExpression[] vectorParams = new VectorExpression[paramDescList.size()];
for (int i = 0; i < paramDescList.size(); ++i) {
ExprNodeDesc exprDesc = paramDescList.get(i);
vectorParams[i] = this.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.PROJECTION);
}
String aggregateName = desc.getGenericUDAFName();
VectorExpressionDescriptor.ArgumentType inputType = VectorExpressionDescriptor.ArgumentType.NONE;
if (paramDescList.size() > 0) {
ExprNodeDesc inputExpr = paramDescList.get(0);
inputType = VectorExpressionDescriptor.ArgumentType.fromHiveTypeName(inputExpr.getTypeString());
if (inputType == VectorExpressionDescriptor.ArgumentType.NONE) {
throw new HiveException("No vector argument type for Hive type name " + inputExpr.getTypeString());
}
}
GenericUDAFEvaluator.Mode udafEvaluatorMode = desc.getMode();
for (AggregateDefinition aggDef : aggregatesDefinition) {
if (aggregateName.equalsIgnoreCase(aggDef.getName()) && ((aggDef.getType() == VectorExpressionDescriptor.ArgumentType.NONE && inputType == VectorExpressionDescriptor.ArgumentType.NONE) || (aggDef.getType().isSameTypeOrFamily(inputType)))) {
// A null means all modes are ok.
GenericUDAFEvaluator.Mode aggDefUdafEvaluatorMode = aggDef.getUdafEvaluatorMode();
if (aggDefUdafEvaluatorMode != null && aggDefUdafEvaluatorMode != udafEvaluatorMode) {
continue;
}
Class<? extends VectorAggregateExpression> aggClass = aggDef.getAggClass();
try {
Constructor<? extends VectorAggregateExpression> ctor = aggClass.getConstructor(VectorExpression.class);
VectorAggregateExpression aggExpr = ctor.newInstance(vectorParams.length > 0 ? vectorParams[0] : null);
aggExpr.init(desc);
return aggExpr;
} catch (Exception e) {
throw new HiveException("Internal exception for vector aggregate : \"" + aggregateName + "\" for type: \"" + inputType + "", e);
}
}
}
throw new HiveException("Vector aggregate not implemented: \"" + aggregateName + "\" for type: \"" + inputType.name() + " (UDAF evaluator mode = " + (udafEvaluatorMode == null ? "NULL" : udafEvaluatorMode.name()) + ")");
}
use of org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression in project hive by apache.
the class VectorAggregationBufferBatch method compileAggregationBatchInfo.
public void compileAggregationBatchInfo(VectorAggregateExpression[] aggregators) {
JavaDataModel model = JavaDataModel.get();
int[] variableSizeAggregators = new int[aggregators.length];
int indexVariableSizes = 0;
aggregatorsFixedSize = JavaDataModel.alignUp(model.object() + model.primitive1() * 2 + model.ref(), model.memoryAlign());
aggregatorsFixedSize += model.lengthForObjectArrayOfSize(aggregators.length);
for (int i = 0; i < aggregators.length; ++i) {
VectorAggregateExpression aggregator = aggregators[i];
aggregatorsFixedSize += aggregator.getAggregationBufferFixedSize();
if (aggregator.hasVariableSize()) {
variableSizeAggregators[indexVariableSizes] = i;
++indexVariableSizes;
}
}
this.variableSizeAggregators = Arrays.copyOfRange(variableSizeAggregators, 0, indexVariableSizes);
}
use of org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression in project hive by apache.
the class Vectorizer method vectorizeOperator.
public Operator<? extends OperatorDesc> vectorizeOperator(Operator<? extends OperatorDesc> op, VectorizationContext vContext, boolean isTezOrSpark, VectorTaskColumnInfo vectorTaskColumnInfo) throws HiveException {
Operator<? extends OperatorDesc> vectorOp = null;
boolean isNative;
switch(op.getType()) {
case TABLESCAN:
vectorOp = vectorizeTableScanOperator(op, vContext);
isNative = true;
break;
case MAPJOIN:
{
if (op instanceof MapJoinOperator) {
VectorMapJoinInfo vectorMapJoinInfo = new VectorMapJoinInfo();
MapJoinDesc desc = (MapJoinDesc) op.getConf();
boolean specialize = canSpecializeMapJoin(op, desc, isTezOrSpark, vContext, vectorMapJoinInfo);
if (!specialize) {
Class<? extends Operator<?>> opClass = null;
// *NON-NATIVE* vector map differences for LEFT OUTER JOIN and Filtered...
List<ExprNodeDesc> bigTableFilters = desc.getFilters().get((byte) desc.getPosBigTable());
boolean isOuterAndFiltered = (!desc.isNoOuterJoin() && bigTableFilters.size() > 0);
if (!isOuterAndFiltered) {
opClass = VectorMapJoinOperator.class;
} else {
opClass = VectorMapJoinOuterFilteredOperator.class;
}
vectorOp = OperatorFactory.getVectorOperator(opClass, op.getCompilationOpContext(), op.getConf(), vContext);
isNative = false;
} else {
// TEMPORARY Until Native Vector Map Join with Hybrid passes tests...
// HiveConf.setBoolVar(physicalContext.getConf(),
// HiveConf.ConfVars.HIVEUSEHYBRIDGRACEHASHJOIN, false);
vectorOp = specializeMapJoinOperator(op, vContext, desc, vectorMapJoinInfo);
isNative = true;
if (vectorTaskColumnInfo != null) {
if (usesVectorUDFAdaptor(vectorMapJoinInfo.getBigTableKeyExpressions())) {
vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true);
}
if (usesVectorUDFAdaptor(vectorMapJoinInfo.getBigTableValueExpressions())) {
vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true);
}
}
}
} else {
Preconditions.checkState(op instanceof SMBMapJoinOperator);
SMBJoinDesc smbJoinSinkDesc = (SMBJoinDesc) op.getConf();
VectorSMBJoinDesc vectorSMBJoinDesc = new VectorSMBJoinDesc();
smbJoinSinkDesc.setVectorDesc(vectorSMBJoinDesc);
vectorOp = OperatorFactory.getVectorOperator(op.getCompilationOpContext(), smbJoinSinkDesc, vContext);
isNative = false;
}
}
break;
case REDUCESINK:
{
VectorReduceSinkInfo vectorReduceSinkInfo = new VectorReduceSinkInfo();
ReduceSinkDesc desc = (ReduceSinkDesc) op.getConf();
boolean specialize = canSpecializeReduceSink(desc, isTezOrSpark, vContext, vectorReduceSinkInfo);
if (!specialize) {
vectorOp = OperatorFactory.getVectorOperator(op.getCompilationOpContext(), op.getConf(), vContext);
isNative = false;
} else {
vectorOp = specializeReduceSinkOperator(op, vContext, desc, vectorReduceSinkInfo);
isNative = true;
if (vectorTaskColumnInfo != null) {
if (usesVectorUDFAdaptor(vectorReduceSinkInfo.getReduceSinkKeyExpressions())) {
vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true);
}
if (usesVectorUDFAdaptor(vectorReduceSinkInfo.getReduceSinkValueExpressions())) {
vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true);
}
}
}
}
break;
case FILTER:
{
vectorOp = vectorizeFilterOperator(op, vContext);
isNative = true;
if (vectorTaskColumnInfo != null) {
VectorFilterDesc vectorFilterDesc = (VectorFilterDesc) ((AbstractOperatorDesc) vectorOp.getConf()).getVectorDesc();
VectorExpression vectorPredicateExpr = vectorFilterDesc.getPredicateExpression();
if (usesVectorUDFAdaptor(vectorPredicateExpr)) {
vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true);
}
}
}
break;
case SELECT:
{
vectorOp = vectorizeSelectOperator(op, vContext);
isNative = true;
if (vectorTaskColumnInfo != null) {
VectorSelectDesc vectorSelectDesc = (VectorSelectDesc) ((AbstractOperatorDesc) vectorOp.getConf()).getVectorDesc();
VectorExpression[] vectorSelectExprs = vectorSelectDesc.getSelectExpressions();
if (usesVectorUDFAdaptor(vectorSelectExprs)) {
vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true);
}
}
}
break;
case GROUPBY:
{
vectorOp = vectorizeGroupByOperator(op, vContext);
isNative = false;
if (vectorTaskColumnInfo != null) {
VectorGroupByDesc vectorGroupByDesc = (VectorGroupByDesc) ((AbstractOperatorDesc) vectorOp.getConf()).getVectorDesc();
if (!vectorGroupByDesc.isVectorOutput()) {
vectorTaskColumnInfo.setGroupByVectorOutput(false);
}
VectorExpression[] vecKeyExpressions = vectorGroupByDesc.getKeyExpressions();
if (usesVectorUDFAdaptor(vecKeyExpressions)) {
vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true);
}
VectorAggregateExpression[] vecAggregators = vectorGroupByDesc.getAggregators();
for (VectorAggregateExpression vecAggr : vecAggregators) {
if (usesVectorUDFAdaptor(vecAggr.inputExpression())) {
vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true);
}
}
}
}
break;
case FILESINK:
{
FileSinkDesc fileSinkDesc = (FileSinkDesc) op.getConf();
VectorFileSinkDesc vectorFileSinkDesc = new VectorFileSinkDesc();
fileSinkDesc.setVectorDesc(vectorFileSinkDesc);
vectorOp = OperatorFactory.getVectorOperator(op.getCompilationOpContext(), fileSinkDesc, vContext);
isNative = false;
}
break;
case LIMIT:
{
LimitDesc limitDesc = (LimitDesc) op.getConf();
VectorLimitDesc vectorLimitDesc = new VectorLimitDesc();
limitDesc.setVectorDesc(vectorLimitDesc);
vectorOp = OperatorFactory.getVectorOperator(op.getCompilationOpContext(), limitDesc, vContext);
isNative = true;
}
break;
case EVENT:
{
AppMasterEventDesc eventDesc = (AppMasterEventDesc) op.getConf();
VectorAppMasterEventDesc vectorEventDesc = new VectorAppMasterEventDesc();
eventDesc.setVectorDesc(vectorEventDesc);
vectorOp = OperatorFactory.getVectorOperator(op.getCompilationOpContext(), eventDesc, vContext);
isNative = true;
}
break;
case HASHTABLESINK:
{
SparkHashTableSinkDesc sparkHashTableSinkDesc = (SparkHashTableSinkDesc) op.getConf();
VectorSparkHashTableSinkDesc vectorSparkHashTableSinkDesc = new VectorSparkHashTableSinkDesc();
sparkHashTableSinkDesc.setVectorDesc(vectorSparkHashTableSinkDesc);
vectorOp = OperatorFactory.getVectorOperator(op.getCompilationOpContext(), sparkHashTableSinkDesc, vContext);
isNative = true;
}
break;
case SPARKPRUNINGSINK:
{
SparkPartitionPruningSinkDesc sparkPartitionPruningSinkDesc = (SparkPartitionPruningSinkDesc) op.getConf();
VectorSparkPartitionPruningSinkDesc vectorSparkPartitionPruningSinkDesc = new VectorSparkPartitionPruningSinkDesc();
sparkPartitionPruningSinkDesc.setVectorDesc(vectorSparkPartitionPruningSinkDesc);
vectorOp = OperatorFactory.getVectorOperator(op.getCompilationOpContext(), sparkPartitionPruningSinkDesc, vContext);
isNative = true;
}
break;
default:
// These are children of GROUP BY operators with non-vector outputs.
isNative = false;
vectorOp = op;
break;
}
Preconditions.checkState(vectorOp != null);
if (vectorTaskColumnInfo != null && !isNative) {
vectorTaskColumnInfo.setAllNative(false);
}
LOG.debug("vectorizeOperator " + vectorOp.getClass().getName());
LOG.debug("vectorizeOperator " + vectorOp.getConf().getClass().getName());
if (vectorOp != op) {
fixupParentChildOperators(op, vectorOp);
((AbstractOperatorDesc) vectorOp.getConf()).setVectorMode(true);
}
return vectorOp;
}
use of org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression in project hive by apache.
the class Vectorizer method vectorizeGroupByOperator.
/*
* NOTE: The VectorGroupByDesc has already been allocated and partially populated.
*/
public static Operator<? extends OperatorDesc> vectorizeGroupByOperator(Operator<? extends OperatorDesc> groupByOp, VectorizationContext vContext) throws HiveException {
GroupByDesc groupByDesc = (GroupByDesc) groupByOp.getConf();
List<ExprNodeDesc> keysDesc = groupByDesc.getKeys();
VectorExpression[] vecKeyExpressions = vContext.getVectorExpressions(keysDesc);
ArrayList<AggregationDesc> aggrDesc = groupByDesc.getAggregators();
final int size = aggrDesc.size();
VectorAggregateExpression[] vecAggregators = new VectorAggregateExpression[size];
int[] projectedOutputColumns = new int[size];
for (int i = 0; i < size; ++i) {
AggregationDesc aggDesc = aggrDesc.get(i);
vecAggregators[i] = vContext.getAggregatorExpression(aggDesc);
// GroupBy generates a new vectorized row batch...
projectedOutputColumns[i] = i;
}
VectorGroupByDesc vectorGroupByDesc = (VectorGroupByDesc) groupByDesc.getVectorDesc();
vectorGroupByDesc.setKeyExpressions(vecKeyExpressions);
vectorGroupByDesc.setAggregators(vecAggregators);
vectorGroupByDesc.setProjectedOutputColumns(projectedOutputColumns);
return OperatorFactory.getVectorOperator(groupByOp.getCompilationOpContext(), groupByDesc, vContext);
}
use of org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression in project hive by apache.
the class Vectorizer method validateAggregationDesc.
private Pair<Boolean, Boolean> validateAggregationDesc(AggregationDesc aggDesc, ProcessingMode processingMode, boolean hasKeys) {
String udfName = aggDesc.getGenericUDAFName().toLowerCase();
if (!supportedAggregationUdfs.contains(udfName)) {
setExpressionIssue("Aggregation Function", "UDF " + udfName + " not supported");
return new Pair<Boolean, Boolean>(false, false);
}
if (aggDesc.getParameters() != null && !validateExprNodeDesc(aggDesc.getParameters(), "Aggregation Function UDF " + udfName + " parameter")) {
return new Pair<Boolean, Boolean>(false, false);
}
// See if we can vectorize the aggregation.
VectorizationContext vc = new ValidatorVectorizationContext(hiveConf);
VectorAggregateExpression vectorAggrExpr;
try {
vectorAggrExpr = vc.getAggregatorExpression(aggDesc);
} catch (Exception e) {
// We should have already attempted to vectorize in validateAggregationDesc.
if (LOG.isDebugEnabled()) {
LOG.debug("Vectorization of aggregation should have succeeded ", e);
}
setExpressionIssue("Aggregation Function", "Vectorization of aggreation should have succeeded " + e);
return new Pair<Boolean, Boolean>(false, false);
}
if (LOG.isDebugEnabled()) {
LOG.debug("Aggregation " + aggDesc.getExprString() + " --> " + " vector expression " + vectorAggrExpr.toString());
}
ObjectInspector.Category outputCategory = aggregationOutputCategory(vectorAggrExpr);
boolean outputIsPrimitive = (outputCategory == ObjectInspector.Category.PRIMITIVE);
if (processingMode == ProcessingMode.MERGE_PARTIAL && hasKeys && !outputIsPrimitive) {
setOperatorIssue("Vectorized Reduce MergePartial GROUP BY keys can only handle aggregate outputs that are primitive types");
return new Pair<Boolean, Boolean>(false, false);
}
return new Pair<Boolean, Boolean>(true, outputIsPrimitive);
}
Aggregations