Search in sources :

Example 51 with ExprNodeDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeDesc in project hive by apache.

the class VectorMapJoinOperator method initializeOp.

@Override
public void initializeOp(Configuration hconf) throws HiveException {
    // Use a final variable to properly parameterize the processVectorInspector closure.
    // Using a member variable in the closure will not do the right thing...
    final int parameterizePosBigTable = conf.getPosBigTable();
    // Code borrowed from VectorReduceSinkOperator.initializeOp
    VectorExpressionWriterFactory.processVectorInspector((StructObjectInspector) inputObjInspectors[parameterizePosBigTable], new VectorExpressionWriterFactory.SingleOIDClosure() {

        @Override
        public void assign(VectorExpressionWriter[] writers, ObjectInspector objectInspector) {
            rowWriters = writers;
            inputObjInspectors[parameterizePosBigTable] = objectInspector;
        }
    });
    singleRow = new Object[rowWriters.length];
    super.initializeOp(hconf);
    List<ExprNodeDesc> keyDesc = conf.getKeys().get(posBigTable);
    keyOutputWriters = VectorExpressionWriterFactory.getExpressionWriters(keyDesc);
    keyWrapperBatch = VectorHashKeyWrapperBatch.compileKeyWrapperBatch(keyExpressions);
    Map<Byte, List<ExprNodeDesc>> valueExpressions = conf.getExprs();
    List<ExprNodeDesc> bigTableExpressions = valueExpressions.get(posBigTable);
    VectorExpressionWriterFactory.processVectorExpressions(bigTableExpressions, new VectorExpressionWriterFactory.ListOIDClosure() {

        @Override
        public void assign(VectorExpressionWriter[] writers, List<ObjectInspector> oids) {
            valueWriters = writers;
            joinValuesObjectInspectors[posBigTable] = oids;
        }
    });
    // We're hijacking the big table evaluators an replace them with our own custom ones
    // which are going to return values from the input batch vector expressions
    List<ExprNodeEvaluator> vectorNodeEvaluators = new ArrayList<ExprNodeEvaluator>(bigTableExpressions.size());
    for (int i = 0; i < bigTableExpressions.size(); ++i) {
        ExprNodeDesc desc = bigTableExpressions.get(i);
        VectorExpression vectorExpr = bigTableValueExpressions[i];
        // This is a vectorized aware evaluator
        ExprNodeEvaluator eval = new ExprNodeEvaluator<ExprNodeDesc>(desc, hconf) {

            int columnIndex;

            int writerIndex;

            public ExprNodeEvaluator initVectorExpr(int columnIndex, int writerIndex) {
                this.columnIndex = columnIndex;
                this.writerIndex = writerIndex;
                return this;
            }

            @Override
            public ObjectInspector initialize(ObjectInspector rowInspector) throws HiveException {
                throw new HiveException("should never reach here");
            }

            @Override
            protected Object _evaluate(Object row, int version) throws HiveException {
                VectorizedRowBatch inBatch = (VectorizedRowBatch) row;
                int rowIndex = inBatch.selectedInUse ? inBatch.selected[batchIndex] : batchIndex;
                return valueWriters[writerIndex].writeValue(inBatch.cols[columnIndex], rowIndex);
            }
        }.initVectorExpr(vectorExpr.getOutputColumn(), i);
        vectorNodeEvaluators.add(eval);
    }
    // Now replace the old evaluators with our own
    joinValues[posBigTable] = vectorNodeEvaluators;
    // Filtering is handled in the input batch processing
    if (filterMaps != null) {
        filterMaps[posBigTable] = null;
    }
}
Also used : StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) ArrayList(java.util.ArrayList) ExprNodeEvaluator(org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator) VectorExpressionWriter(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter) VectorExpressionWriterFactory(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriterFactory) ArrayList(java.util.ArrayList) List(java.util.List) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Example 52 with ExprNodeDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeDesc in project hive by apache.

the class VectorSMBMapJoinOperator method initializeOp.

@Override
protected void initializeOp(Configuration hconf) throws HiveException {
    super.initializeOp(hconf);
    vrbCtx = new VectorizedRowBatchCtx();
    vrbCtx.init((StructObjectInspector) this.outputObjInspector, vOutContext.getScratchColumnTypeNames());
    outputBatch = vrbCtx.createVectorizedRowBatch();
    keyWrapperBatch = VectorHashKeyWrapperBatch.compileKeyWrapperBatch(keyExpressions);
    outputVectorAssignRowMap = new HashMap<ObjectInspector, VectorAssignRow>();
    // This key evaluator translates from the vectorized VectorHashKeyWrapper format
    // into the row-mode MapJoinKey
    keyEvaluator = new SMBJoinKeyEvaluator() {

        private List<Object> key;

        public SMBJoinKeyEvaluator init() {
            key = new ArrayList<Object>();
            for (int i = 0; i < keyExpressions.length; ++i) {
                key.add(null);
            }
            return this;
        }

        @Override
        public List<Object> evaluate(VectorHashKeyWrapper kw) throws HiveException {
            for (int i = 0; i < keyExpressions.length; ++i) {
                key.set(i, keyWrapperBatch.getWritableKeyValue(kw, i, keyOutputWriters[i]));
            }
            return key;
        }

        ;
    }.init();
    Map<Byte, List<ExprNodeDesc>> valueExpressions = conf.getExprs();
    List<ExprNodeDesc> bigTableExpressions = valueExpressions.get(posBigTable);
    // We're hijacking the big table evaluators and replacing them with our own custom ones
    // which are going to return values from the input batch vector expressions
    List<ExprNodeEvaluator> vectorNodeEvaluators = new ArrayList<ExprNodeEvaluator>(bigTableExpressions.size());
    VectorExpressionWriterFactory.processVectorExpressions(bigTableExpressions, new VectorExpressionWriterFactory.ListOIDClosure() {

        @Override
        public void assign(VectorExpressionWriter[] writers, List<ObjectInspector> oids) {
            valueWriters = writers;
            joinValuesObjectInspectors[posBigTable] = oids;
        }
    });
    for (int i = 0; i < bigTableExpressions.size(); ++i) {
        ExprNodeDesc desc = bigTableExpressions.get(i);
        VectorExpression vectorExpr = bigTableValueExpressions[i];
        // This is a vectorized aware evaluator
        ExprNodeEvaluator eval = new ExprNodeEvaluator<ExprNodeDesc>(desc, hconf) {

            int columnIndex;

            ;

            int writerIndex;

            public ExprNodeEvaluator initVectorExpr(int columnIndex, int writerIndex) {
                this.columnIndex = columnIndex;
                this.writerIndex = writerIndex;
                return this;
            }

            @Override
            public ObjectInspector initialize(ObjectInspector rowInspector) throws HiveException {
                throw new HiveException("should never reach here");
            }

            @Override
            protected Object _evaluate(Object row, int version) throws HiveException {
                VectorizedRowBatch inBatch = (VectorizedRowBatch) row;
                int rowIndex = inBatch.selectedInUse ? inBatch.selected[batchIndex] : batchIndex;
                return valueWriters[writerIndex].writeValue(inBatch.cols[columnIndex], rowIndex);
            }
        }.initVectorExpr(vectorExpr.getOutputColumn(), i);
        vectorNodeEvaluators.add(eval);
    }
    // Now replace the old evaluators with our own
    joinValues[posBigTable] = vectorNodeEvaluators;
}
Also used : HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) ArrayList(java.util.ArrayList) VectorExpressionWriter(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter) ArrayList(java.util.ArrayList) List(java.util.List) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) ExprNodeEvaluator(org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator) VectorExpressionWriterFactory(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriterFactory) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)

Example 53 with ExprNodeDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeDesc in project hive by apache.

the class VectorSelectOperator method initializeOp.

@Override
protected void initializeOp(Configuration hconf) throws HiveException {
    super.initializeOp(hconf);
    // Just forward the row as is
    if (conf.isSelStarNoCompute()) {
        return;
    }
    List<ObjectInspector> objectInspectors = new ArrayList<ObjectInspector>();
    List<ExprNodeDesc> colList = conf.getColList();
    valueWriters = VectorExpressionWriterFactory.getExpressionWriters(colList);
    for (VectorExpressionWriter vew : valueWriters) {
        objectInspectors.add(vew.getObjectInspector());
    }
    List<String> outputFieldNames = conf.getOutputColumnNames();
    outputObjInspector = ObjectInspectorFactory.getStandardStructObjectInspector(outputFieldNames, objectInspectors);
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) ArrayList(java.util.ArrayList) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) VectorExpressionWriter(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter)

Example 54 with ExprNodeDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeDesc in project hive by apache.

the class VectorizationContext method getAggregatorExpression.

public VectorAggregateExpression getAggregatorExpression(AggregationDesc desc) throws HiveException {
    ArrayList<ExprNodeDesc> paramDescList = desc.getParameters();
    VectorExpression[] vectorParams = new VectorExpression[paramDescList.size()];
    for (int i = 0; i < paramDescList.size(); ++i) {
        ExprNodeDesc exprDesc = paramDescList.get(i);
        vectorParams[i] = this.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.PROJECTION);
    }
    String aggregateName = desc.getGenericUDAFName();
    VectorExpressionDescriptor.ArgumentType inputType = VectorExpressionDescriptor.ArgumentType.NONE;
    if (paramDescList.size() > 0) {
        ExprNodeDesc inputExpr = paramDescList.get(0);
        inputType = VectorExpressionDescriptor.ArgumentType.fromHiveTypeName(inputExpr.getTypeString());
        if (inputType == VectorExpressionDescriptor.ArgumentType.NONE) {
            throw new HiveException("No vector argument type for Hive type name " + inputExpr.getTypeString());
        }
    }
    GenericUDAFEvaluator.Mode udafEvaluatorMode = desc.getMode();
    for (AggregateDefinition aggDef : aggregatesDefinition) {
        if (aggregateName.equalsIgnoreCase(aggDef.getName()) && ((aggDef.getType() == VectorExpressionDescriptor.ArgumentType.NONE && inputType == VectorExpressionDescriptor.ArgumentType.NONE) || (aggDef.getType().isSameTypeOrFamily(inputType)))) {
            // A null means all modes are ok.
            GenericUDAFEvaluator.Mode aggDefUdafEvaluatorMode = aggDef.getUdafEvaluatorMode();
            if (aggDefUdafEvaluatorMode != null && aggDefUdafEvaluatorMode != udafEvaluatorMode) {
                continue;
            }
            Class<? extends VectorAggregateExpression> aggClass = aggDef.getAggClass();
            try {
                Constructor<? extends VectorAggregateExpression> ctor = aggClass.getConstructor(VectorExpression.class);
                VectorAggregateExpression aggExpr = ctor.newInstance(vectorParams.length > 0 ? vectorParams[0] : null);
                aggExpr.init(desc);
                return aggExpr;
            } catch (Exception e) {
                throw new HiveException("Internal exception for vector aggregate : \"" + aggregateName + "\" for type: \"" + inputType + "", e);
            }
        }
    }
    throw new HiveException("Vector aggregate not implemented: \"" + aggregateName + "\" for type: \"" + inputType.name() + " (UDAF evaluator mode = " + (udafEvaluatorMode == null ? "NULL" : udafEvaluatorMode.name()) + ")");
}
Also used : HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) VectorAggregateExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression) VectorUDAFMaxString(org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMaxString) VectorUDAFMinString(org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMinString) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) ArgumentType(org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.ArgumentType) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) Mode(org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode)

Example 55 with ExprNodeDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeDesc in project hive by apache.

the class VectorizationContext method getStructInExpression.

private VectorExpression getStructInExpression(List<ExprNodeDesc> childExpr, ExprNodeDesc colExpr, TypeInfo colTypeInfo, List<ExprNodeDesc> inChildren, VectorExpressionDescriptor.Mode mode, TypeInfo returnType) throws HiveException {
    VectorExpression expr = null;
    StructTypeInfo structTypeInfo = (StructTypeInfo) colTypeInfo;
    ArrayList<TypeInfo> fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos();
    final int fieldCount = fieldTypeInfos.size();
    ColumnVector.Type[] fieldVectorColumnTypes = new ColumnVector.Type[fieldCount];
    InConstantType[] fieldInConstantTypes = new InConstantType[fieldCount];
    for (int f = 0; f < fieldCount; f++) {
        TypeInfo fieldTypeInfo = fieldTypeInfos.get(f);
        // Only primitive fields supports for now.
        if (fieldTypeInfo.getCategory() != Category.PRIMITIVE) {
            return null;
        }
        // We are going to serialize using the 4 basic types.
        ColumnVector.Type fieldVectorColumnType = getColumnVectorTypeFromTypeInfo(fieldTypeInfo);
        fieldVectorColumnTypes[f] = fieldVectorColumnType;
        // We currently evaluate the IN (..) constants in special ways.
        PrimitiveCategory fieldPrimitiveCategory = ((PrimitiveTypeInfo) fieldTypeInfo).getPrimitiveCategory();
        InConstantType inConstantType = getInConstantTypeFromPrimitiveCategory(fieldPrimitiveCategory);
        fieldInConstantTypes[f] = inConstantType;
    }
    Output buffer = new Output();
    BinarySortableSerializeWrite binarySortableSerializeWrite = new BinarySortableSerializeWrite(fieldCount);
    final int inChildrenCount = inChildren.size();
    byte[][] serializedInChildren = new byte[inChildrenCount][];
    try {
        for (int i = 0; i < inChildrenCount; i++) {
            final ExprNodeDesc node = inChildren.get(i);
            final Object[] constants;
            if (node instanceof ExprNodeConstantDesc) {
                ExprNodeConstantDesc constNode = (ExprNodeConstantDesc) node;
                ConstantObjectInspector output = constNode.getWritableObjectInspector();
                constants = ((List<?>) output.getWritableConstantValue()).toArray();
            } else {
                ExprNodeGenericFuncDesc exprNode = (ExprNodeGenericFuncDesc) node;
                ExprNodeEvaluator<?> evaluator = ExprNodeEvaluatorFactory.get(exprNode);
                ObjectInspector output = evaluator.initialize(exprNode.getWritableObjectInspector());
                constants = (Object[]) evaluator.evaluate(null);
            }
            binarySortableSerializeWrite.set(buffer);
            for (int f = 0; f < fieldCount; f++) {
                Object constant = constants[f];
                if (constant == null) {
                    binarySortableSerializeWrite.writeNull();
                } else {
                    InConstantType inConstantType = fieldInConstantTypes[f];
                    switch(inConstantType) {
                        case STRING_FAMILY:
                            {
                                byte[] bytes;
                                if (constant instanceof Text) {
                                    Text text = (Text) constant;
                                    bytes = text.getBytes();
                                    binarySortableSerializeWrite.writeString(bytes, 0, text.getLength());
                                } else {
                                    throw new HiveException("Unexpected constant String type " + constant.getClass().getSimpleName());
                                }
                            }
                            break;
                        case INT_FAMILY:
                            {
                                long value;
                                if (constant instanceof IntWritable) {
                                    value = ((IntWritable) constant).get();
                                } else if (constant instanceof LongWritable) {
                                    value = ((LongWritable) constant).get();
                                } else {
                                    throw new HiveException("Unexpected constant Long type " + constant.getClass().getSimpleName());
                                }
                                binarySortableSerializeWrite.writeLong(value);
                            }
                            break;
                        case FLOAT_FAMILY:
                            {
                                double value;
                                if (constant instanceof DoubleWritable) {
                                    value = ((DoubleWritable) constant).get();
                                } else {
                                    throw new HiveException("Unexpected constant Double type " + constant.getClass().getSimpleName());
                                }
                                binarySortableSerializeWrite.writeDouble(value);
                            }
                            break;
                        // UNDONE...
                        case DATE:
                        case TIMESTAMP:
                        case DECIMAL:
                        default:
                            throw new RuntimeException("Unexpected IN constant type " + inConstantType.name());
                    }
                }
            }
            serializedInChildren[i] = Arrays.copyOfRange(buffer.getData(), 0, buffer.getLength());
        }
    } catch (Exception e) {
        throw new HiveException(e);
    }
    // Create a single child representing the scratch column where we will
    // generate the serialized keys of the batch.
    int scratchBytesCol = ocm.allocateOutputColumn(TypeInfoFactory.stringTypeInfo);
    Class<?> cl = (mode == VectorExpressionDescriptor.Mode.FILTER ? FilterStructColumnInList.class : StructColumnInList.class);
    expr = createVectorExpression(cl, null, VectorExpressionDescriptor.Mode.PROJECTION, returnType);
    ((IStringInExpr) expr).setInListValues(serializedInChildren);
    ((IStructInExpr) expr).setScratchBytesColumn(scratchBytesCol);
    ((IStructInExpr) expr).setStructColumnExprs(this, colExpr.getChildren(), fieldVectorColumnTypes);
    return expr;
}
Also used : HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) BinarySortableSerializeWrite(org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) Type(org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type) Output(org.apache.hadoop.hive.serde2.ByteStream.Output) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) LongWritable(org.apache.hadoop.io.LongWritable) PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) IntWritable(org.apache.hadoop.io.IntWritable) ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) ConstantObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) Text(org.apache.hadoop.io.Text) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) BaseCharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) InputExpressionType(org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.InputExpressionType) ArgumentType(org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.ArgumentType) Type(org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type) ConstantObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector)

Aggregations

ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)321 ArrayList (java.util.ArrayList)179 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)146 ExprNodeGenericFuncDesc (org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)110 ExprNodeConstantDesc (org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc)101 Test (org.junit.Test)74 ColumnInfo (org.apache.hadoop.hive.ql.exec.ColumnInfo)69 HashMap (java.util.HashMap)67 RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)57 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)47 LinkedHashMap (java.util.LinkedHashMap)43 SelectOperator (org.apache.hadoop.hive.ql.exec.SelectOperator)42 List (java.util.List)40 Operator (org.apache.hadoop.hive.ql.exec.Operator)39 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)35 GroupByOperator (org.apache.hadoop.hive.ql.exec.GroupByOperator)34 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)34 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)34 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)33 VectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)32