Search in sources :

Example 1 with VectorExpressionWriter

use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter in project hive by apache.

the class MapJoinKey method serializeVector.

/**
   * Serializes row to output for vectorized path.
   * @param byteStream Output to reuse. Can be null, in that case a new one would be created.
   */
public static Output serializeVector(Output byteStream, VectorHashKeyWrapper kw, VectorExpressionWriter[] keyOutputWriters, VectorHashKeyWrapperBatch keyWrapperBatch, boolean[] nulls, boolean[] sortableSortOrders, byte[] nullMarkers, byte[] notNullMarkers) throws HiveException, SerDeException {
    Object[] fieldData = new Object[keyOutputWriters.length];
    List<ObjectInspector> fieldOis = new ArrayList<ObjectInspector>();
    for (int i = 0; i < keyOutputWriters.length; ++i) {
        VectorExpressionWriter writer = keyOutputWriters[i];
        fieldOis.add(writer.getObjectInspector());
        // This is rather convoluted... to simplify for perf, we could call getRawKeyValue
        // instead of writable, and serialize based on Java type as opposed to OI.
        fieldData[i] = keyWrapperBatch.getWritableKeyValue(kw, i, writer);
        if (nulls != null) {
            nulls[i] = (fieldData[i] == null);
        }
    }
    return serializeRow(byteStream, fieldData, fieldOis, sortableSortOrders, nullMarkers, notNullMarkers);
}
Also used : PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) ArrayList(java.util.ArrayList) VectorExpressionWriter(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter)

Example 2 with VectorExpressionWriter

use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter in project hive by apache.

the class VectorMapJoinOperator method initializeOp.

@Override
public void initializeOp(Configuration hconf) throws HiveException {
    // Use a final variable to properly parameterize the processVectorInspector closure.
    // Using a member variable in the closure will not do the right thing...
    final int parameterizePosBigTable = conf.getPosBigTable();
    // Code borrowed from VectorReduceSinkOperator.initializeOp
    VectorExpressionWriterFactory.processVectorInspector((StructObjectInspector) inputObjInspectors[parameterizePosBigTable], new VectorExpressionWriterFactory.SingleOIDClosure() {

        @Override
        public void assign(VectorExpressionWriter[] writers, ObjectInspector objectInspector) {
            rowWriters = writers;
            inputObjInspectors[parameterizePosBigTable] = objectInspector;
        }
    });
    singleRow = new Object[rowWriters.length];
    super.initializeOp(hconf);
    List<ExprNodeDesc> keyDesc = conf.getKeys().get(posBigTable);
    keyOutputWriters = VectorExpressionWriterFactory.getExpressionWriters(keyDesc);
    keyWrapperBatch = VectorHashKeyWrapperBatch.compileKeyWrapperBatch(keyExpressions);
    Map<Byte, List<ExprNodeDesc>> valueExpressions = conf.getExprs();
    List<ExprNodeDesc> bigTableExpressions = valueExpressions.get(posBigTable);
    VectorExpressionWriterFactory.processVectorExpressions(bigTableExpressions, new VectorExpressionWriterFactory.ListOIDClosure() {

        @Override
        public void assign(VectorExpressionWriter[] writers, List<ObjectInspector> oids) {
            valueWriters = writers;
            joinValuesObjectInspectors[posBigTable] = oids;
        }
    });
    // We're hijacking the big table evaluators an replace them with our own custom ones
    // which are going to return values from the input batch vector expressions
    List<ExprNodeEvaluator> vectorNodeEvaluators = new ArrayList<ExprNodeEvaluator>(bigTableExpressions.size());
    for (int i = 0; i < bigTableExpressions.size(); ++i) {
        ExprNodeDesc desc = bigTableExpressions.get(i);
        VectorExpression vectorExpr = bigTableValueExpressions[i];
        // This is a vectorized aware evaluator
        ExprNodeEvaluator eval = new ExprNodeEvaluator<ExprNodeDesc>(desc, hconf) {

            int columnIndex;

            int writerIndex;

            public ExprNodeEvaluator initVectorExpr(int columnIndex, int writerIndex) {
                this.columnIndex = columnIndex;
                this.writerIndex = writerIndex;
                return this;
            }

            @Override
            public ObjectInspector initialize(ObjectInspector rowInspector) throws HiveException {
                throw new HiveException("should never reach here");
            }

            @Override
            protected Object _evaluate(Object row, int version) throws HiveException {
                VectorizedRowBatch inBatch = (VectorizedRowBatch) row;
                int rowIndex = inBatch.selectedInUse ? inBatch.selected[batchIndex] : batchIndex;
                return valueWriters[writerIndex].writeValue(inBatch.cols[columnIndex], rowIndex);
            }
        }.initVectorExpr(vectorExpr.getOutputColumn(), i);
        vectorNodeEvaluators.add(eval);
    }
    // Now replace the old evaluators with our own
    joinValues[posBigTable] = vectorNodeEvaluators;
    // Filtering is handled in the input batch processing
    if (filterMaps != null) {
        filterMaps[posBigTable] = null;
    }
}
Also used : StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) ArrayList(java.util.ArrayList) ExprNodeEvaluator(org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator) VectorExpressionWriter(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter) VectorExpressionWriterFactory(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriterFactory) ArrayList(java.util.ArrayList) List(java.util.List) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Example 3 with VectorExpressionWriter

use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter in project hive by apache.

the class VectorSMBMapJoinOperator method initializeOp.

@Override
protected void initializeOp(Configuration hconf) throws HiveException {
    super.initializeOp(hconf);
    vrbCtx = new VectorizedRowBatchCtx();
    vrbCtx.init((StructObjectInspector) this.outputObjInspector, vOutContext.getScratchColumnTypeNames());
    outputBatch = vrbCtx.createVectorizedRowBatch();
    keyWrapperBatch = VectorHashKeyWrapperBatch.compileKeyWrapperBatch(keyExpressions);
    outputVectorAssignRowMap = new HashMap<ObjectInspector, VectorAssignRow>();
    // This key evaluator translates from the vectorized VectorHashKeyWrapper format
    // into the row-mode MapJoinKey
    keyEvaluator = new SMBJoinKeyEvaluator() {

        private List<Object> key;

        public SMBJoinKeyEvaluator init() {
            key = new ArrayList<Object>();
            for (int i = 0; i < keyExpressions.length; ++i) {
                key.add(null);
            }
            return this;
        }

        @Override
        public List<Object> evaluate(VectorHashKeyWrapper kw) throws HiveException {
            for (int i = 0; i < keyExpressions.length; ++i) {
                key.set(i, keyWrapperBatch.getWritableKeyValue(kw, i, keyOutputWriters[i]));
            }
            return key;
        }

        ;
    }.init();
    Map<Byte, List<ExprNodeDesc>> valueExpressions = conf.getExprs();
    List<ExprNodeDesc> bigTableExpressions = valueExpressions.get(posBigTable);
    // We're hijacking the big table evaluators and replacing them with our own custom ones
    // which are going to return values from the input batch vector expressions
    List<ExprNodeEvaluator> vectorNodeEvaluators = new ArrayList<ExprNodeEvaluator>(bigTableExpressions.size());
    VectorExpressionWriterFactory.processVectorExpressions(bigTableExpressions, new VectorExpressionWriterFactory.ListOIDClosure() {

        @Override
        public void assign(VectorExpressionWriter[] writers, List<ObjectInspector> oids) {
            valueWriters = writers;
            joinValuesObjectInspectors[posBigTable] = oids;
        }
    });
    for (int i = 0; i < bigTableExpressions.size(); ++i) {
        ExprNodeDesc desc = bigTableExpressions.get(i);
        VectorExpression vectorExpr = bigTableValueExpressions[i];
        // This is a vectorized aware evaluator
        ExprNodeEvaluator eval = new ExprNodeEvaluator<ExprNodeDesc>(desc, hconf) {

            int columnIndex;

            ;

            int writerIndex;

            public ExprNodeEvaluator initVectorExpr(int columnIndex, int writerIndex) {
                this.columnIndex = columnIndex;
                this.writerIndex = writerIndex;
                return this;
            }

            @Override
            public ObjectInspector initialize(ObjectInspector rowInspector) throws HiveException {
                throw new HiveException("should never reach here");
            }

            @Override
            protected Object _evaluate(Object row, int version) throws HiveException {
                VectorizedRowBatch inBatch = (VectorizedRowBatch) row;
                int rowIndex = inBatch.selectedInUse ? inBatch.selected[batchIndex] : batchIndex;
                return valueWriters[writerIndex].writeValue(inBatch.cols[columnIndex], rowIndex);
            }
        }.initVectorExpr(vectorExpr.getOutputColumn(), i);
        vectorNodeEvaluators.add(eval);
    }
    // Now replace the old evaluators with our own
    joinValues[posBigTable] = vectorNodeEvaluators;
}
Also used : HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) ArrayList(java.util.ArrayList) VectorExpressionWriter(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter) ArrayList(java.util.ArrayList) List(java.util.List) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) ExprNodeEvaluator(org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator) VectorExpressionWriterFactory(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriterFactory) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)

Example 4 with VectorExpressionWriter

use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter in project hive by apache.

the class VectorSelectOperator method initializeOp.

@Override
protected void initializeOp(Configuration hconf) throws HiveException {
    super.initializeOp(hconf);
    // Just forward the row as is
    if (conf.isSelStarNoCompute()) {
        return;
    }
    List<ObjectInspector> objectInspectors = new ArrayList<ObjectInspector>();
    List<ExprNodeDesc> colList = conf.getColList();
    valueWriters = VectorExpressionWriterFactory.getExpressionWriters(colList);
    for (VectorExpressionWriter vew : valueWriters) {
        objectInspectors.add(vew.getObjectInspector());
    }
    List<String> outputFieldNames = conf.getOutputColumnNames();
    outputObjInspector = ObjectInspectorFactory.getStandardStructObjectInspector(outputFieldNames, objectInspectors);
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) ArrayList(java.util.ArrayList) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) VectorExpressionWriter(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter)

Example 5 with VectorExpressionWriter

use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter in project hive by apache.

the class SparkReduceRecordHandler method init.

@Override
@SuppressWarnings("unchecked")
public void init(JobConf job, OutputCollector output, Reporter reporter) throws Exception {
    perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.SPARK_INIT_OPERATORS);
    super.init(job, output, reporter);
    rowObjectInspector = new ObjectInspector[Byte.MAX_VALUE];
    ObjectInspector[] valueObjectInspector = new ObjectInspector[Byte.MAX_VALUE];
    ObjectInspector keyObjectInspector;
    ReduceWork gWork = Utilities.getReduceWork(job);
    reducer = gWork.getReducer();
    vectorized = gWork.getVectorMode();
    // clear out any parents as reducer is the
    reducer.setParentOperators(null);
    // root
    isTagged = gWork.getNeedsTagging();
    try {
        keyTableDesc = gWork.getKeyDesc();
        inputKeyDeserializer = ReflectionUtils.newInstance(keyTableDesc.getDeserializerClass(), null);
        SerDeUtils.initializeSerDe(inputKeyDeserializer, null, keyTableDesc.getProperties(), null);
        keyObjectInspector = inputKeyDeserializer.getObjectInspector();
        valueTableDesc = new TableDesc[gWork.getTagToValueDesc().size()];
        if (vectorized) {
            final int maxTags = gWork.getTagToValueDesc().size();
            keyStructInspector = (StructObjectInspector) keyObjectInspector;
            batches = new VectorizedRowBatch[maxTags];
            valueStructInspectors = new StructObjectInspector[maxTags];
            valueStringWriters = new List[maxTags];
            keysColumnOffset = keyStructInspector.getAllStructFieldRefs().size();
            buffer = new DataOutputBuffer();
        }
        for (int tag = 0; tag < gWork.getTagToValueDesc().size(); tag++) {
            // We should initialize the SerDe with the TypeInfo when available.
            valueTableDesc[tag] = gWork.getTagToValueDesc().get(tag);
            inputValueDeserializer[tag] = ReflectionUtils.newInstance(valueTableDesc[tag].getDeserializerClass(), null);
            SerDeUtils.initializeSerDe(inputValueDeserializer[tag], null, valueTableDesc[tag].getProperties(), null);
            valueObjectInspector[tag] = inputValueDeserializer[tag].getObjectInspector();
            ArrayList<ObjectInspector> ois = new ArrayList<ObjectInspector>();
            if (vectorized) {
                /* vectorization only works with struct object inspectors */
                valueStructInspectors[tag] = (StructObjectInspector) valueObjectInspector[tag];
                final int totalColumns = keysColumnOffset + valueStructInspectors[tag].getAllStructFieldRefs().size();
                valueStringWriters[tag] = new ArrayList<VectorExpressionWriter>(totalColumns);
                valueStringWriters[tag].addAll(Arrays.asList(VectorExpressionWriterFactory.genVectorStructExpressionWritables(keyStructInspector)));
                valueStringWriters[tag].addAll(Arrays.asList(VectorExpressionWriterFactory.genVectorStructExpressionWritables(valueStructInspectors[tag])));
                rowObjectInspector[tag] = Utilities.constructVectorizedReduceRowOI(keyStructInspector, valueStructInspectors[tag]);
                batches[tag] = gWork.getVectorizedRowBatchCtx().createVectorizedRowBatch();
            } else {
                ois.add(keyObjectInspector);
                ois.add(valueObjectInspector[tag]);
                //reducer.setGroupKeyObjectInspector(keyObjectInspector);
                rowObjectInspector[tag] = ObjectInspectorFactory.getStandardStructObjectInspector(Utilities.reduceFieldNameList, ois);
            }
        }
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
    ExecMapperContext execContext = new ExecMapperContext(job);
    localWork = gWork.getMapRedLocalWork();
    execContext.setJc(jc);
    execContext.setLocalWork(localWork);
    reducer.passExecContext(execContext);
    reducer.setReporter(rp);
    OperatorUtils.setChildrenCollector(Arrays.<Operator<? extends OperatorDesc>>asList(reducer), output);
    // initialize reduce operator tree
    try {
        LOG.info(reducer.dump(0));
        reducer.initialize(jc, rowObjectInspector);
        if (localWork != null) {
            for (Operator<? extends OperatorDesc> dummyOp : localWork.getDummyParentOp()) {
                dummyOp.setExecContext(execContext);
                dummyOp.initialize(jc, null);
            }
        }
    } catch (Throwable e) {
        abort = true;
        if (e instanceof OutOfMemoryError) {
            // Don't create a new object if we are already out of memory
            throw (OutOfMemoryError) e;
        } else {
            throw new RuntimeException("Reduce operator initialization failed", e);
        }
    }
    perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.SPARK_INIT_OPERATORS);
}
Also used : ExecMapperContext(org.apache.hadoop.hive.ql.exec.mr.ExecMapperContext) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) ArrayList(java.util.ArrayList) ReduceWork(org.apache.hadoop.hive.ql.plan.ReduceWork) VectorExpressionWriter(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter) IOException(java.io.IOException) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) DataOutputBuffer(org.apache.hadoop.io.DataOutputBuffer)

Aggregations

ArrayList (java.util.ArrayList)6 VectorExpressionWriter (org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter)6 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)6 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)4 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)3 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)3 List (java.util.List)2 ExprNodeEvaluator (org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator)2 VectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)2 VectorExpressionWriterFactory (org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriterFactory)2 IOException (java.io.IOException)1 ExecMapperContext (org.apache.hadoop.hive.ql.exec.mr.ExecMapperContext)1 BytesColumnVector (org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector)1 ColumnVector (org.apache.hadoop.hive.ql.exec.vector.ColumnVector)1 ReduceWork (org.apache.hadoop.hive.ql.plan.ReduceWork)1 SerDeException (org.apache.hadoop.hive.serde2.SerDeException)1 BinarySortableSerDe (org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe)1 BinarySortableDeserializeRead (org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead)1 LazyBinaryDeserializeRead (org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead)1 PrimitiveObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)1