Search in sources :

Example 46 with VectorExpression

use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression in project hive by apache.

the class VectorMapJoinInnerGenerateResultOperator method finishInnerRepeated.

protected void finishInnerRepeated(VectorizedRowBatch batch, JoinUtil.JoinResult joinResult, VectorMapJoinHashTableResult hashMapResult) throws HiveException, IOException {
    int numSel = 0;
    switch(joinResult) {
        case MATCH:
            if (bigTableValueExpressions != null) {
                // Run our value expressions over whole batch.
                for (VectorExpression ve : bigTableValueExpressions) {
                    ve.evaluate(batch);
                }
            }
            // Generate special repeated case.
            generateHashMapResultRepeatedAll(batch, hashMapResults[0]);
            break;
        case SPILL:
            // Whole batch is spilled.
            spillBatchRepeated(batch, (VectorMapJoinHashTableResult) hashMapResults[0]);
            batch.size = 0;
            break;
        case NOMATCH:
            // No match for entire batch.
            batch.size = 0;
            break;
    }
}
Also used : VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)

Example 47 with VectorExpression

use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression in project hive by apache.

the class VectorReduceSinkCommonOperator method process.

@Override
public void process(Object row, int tag) throws HiveException {
    try {
        VectorizedRowBatch batch = (VectorizedRowBatch) row;
        batchCounter++;
        if (batch.size == 0) {
            if (LOG.isDebugEnabled()) {
                LOG.debug(CLASS_NAME + " batch #" + batchCounter + " empty");
            }
            return;
        }
        // Perform any key expressions.  Results will go into scratch columns.
        if (reduceSinkKeyExpressions != null) {
            for (VectorExpression ve : reduceSinkKeyExpressions) {
                ve.evaluate(batch);
            }
        }
        // Perform any value expressions.  Results will go into scratch columns.
        if (reduceSinkValueExpressions != null) {
            for (VectorExpression ve : reduceSinkValueExpressions) {
                ve.evaluate(batch);
            }
        }
        serializedKeySeries.processBatch(batch);
        boolean selectedInUse = batch.selectedInUse;
        int[] selected = batch.selected;
        int keyLength;
        int logical;
        int end;
        int batchIndex;
        do {
            if (serializedKeySeries.getCurrentIsAllNull()) {
                //
                if (tag == -1 || reduceSkipTag) {
                    keyWritable.set(nullBytes, 0, nullBytes.length);
                } else {
                    keyWritable.setSize(nullBytes.length + 1);
                    System.arraycopy(nullBytes, 0, keyWritable.get(), 0, nullBytes.length);
                    keyWritable.get()[nullBytes.length] = reduceTagByte;
                }
                keyWritable.setDistKeyLength(nullBytes.length);
                keyWritable.setHashCode(nullKeyHashCode);
            } else {
                // One serialized key for 1 or more rows for the duplicate keys.
                // LOG.info("reduceSkipTag " + reduceSkipTag + " tag " + tag + " reduceTagByte " + (int) reduceTagByte + " keyLength " + serializedKeySeries.getSerializedLength());
                // LOG.info("process offset " + serializedKeySeries.getSerializedStart() + " length " + serializedKeySeries.getSerializedLength());
                keyLength = serializedKeySeries.getSerializedLength();
                if (tag == -1 || reduceSkipTag) {
                    keyWritable.set(serializedKeySeries.getSerializedBytes(), serializedKeySeries.getSerializedStart(), keyLength);
                } else {
                    keyWritable.setSize(keyLength + 1);
                    System.arraycopy(serializedKeySeries.getSerializedBytes(), serializedKeySeries.getSerializedStart(), keyWritable.get(), 0, keyLength);
                    keyWritable.get()[keyLength] = reduceTagByte;
                }
                keyWritable.setDistKeyLength(keyLength);
                keyWritable.setHashCode(serializedKeySeries.getCurrentHashCode());
            }
            logical = serializedKeySeries.getCurrentLogical();
            end = logical + serializedKeySeries.getCurrentDuplicateCount();
            do {
                batchIndex = (selectedInUse ? selected[logical] : logical);
                valueLazyBinarySerializeWrite.reset();
                valueVectorSerializeRow.serializeWrite(batch, batchIndex);
                valueBytesWritable.set(valueOutput.getData(), 0, valueOutput.getLength());
                collect(keyWritable, valueBytesWritable);
            } while (++logical < end);
            if (!serializedKeySeries.next()) {
                break;
            }
        } while (true);
    } catch (Exception e) {
        throw new HiveException(e);
    }
}
Also used : VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression) IOException(java.io.IOException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException)

Example 48 with VectorExpression

use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression in project hive by apache.

the class Vectorizer method canSpecializeReduceSink.

private boolean canSpecializeReduceSink(ReduceSinkDesc desc, boolean isTezOrSpark, VectorizationContext vContext, VectorReduceSinkInfo vectorReduceSinkInfo) throws HiveException {
    // Allocate a VectorReduceSinkDesc initially with key type NONE so EXPLAIN can report this
    // operator was vectorized, but not native.  And, the conditions.
    VectorReduceSinkDesc vectorDesc = new VectorReduceSinkDesc();
    desc.setVectorDesc(vectorDesc);
    boolean isVectorizationReduceSinkNativeEnabled = HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_VECTORIZATION_REDUCESINK_NEW_ENABLED);
    String engine = HiveConf.getVar(hiveConf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE);
    boolean hasBuckets = desc.getBucketCols() != null && !desc.getBucketCols().isEmpty();
    boolean hasTopN = desc.getTopN() >= 0;
    boolean useUniformHash = desc.getReducerTraits().contains(UNIFORM);
    boolean hasDistinctColumns = desc.getDistinctColumnIndices().size() > 0;
    TableDesc keyTableDesc = desc.getKeySerializeInfo();
    Class<? extends Deserializer> keySerializerClass = keyTableDesc.getDeserializerClass();
    boolean isKeyBinarySortable = (keySerializerClass == org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe.class);
    TableDesc valueTableDesc = desc.getValueSerializeInfo();
    Class<? extends Deserializer> valueDeserializerClass = valueTableDesc.getDeserializerClass();
    boolean isValueLazyBinary = (valueDeserializerClass == org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe.class);
    // Remember the condition variables for EXPLAIN regardless.
    vectorDesc.setIsVectorizationReduceSinkNativeEnabled(isVectorizationReduceSinkNativeEnabled);
    vectorDesc.setEngine(engine);
    vectorDesc.setHasBuckets(hasBuckets);
    vectorDesc.setHasTopN(hasTopN);
    vectorDesc.setUseUniformHash(useUniformHash);
    vectorDesc.setHasDistinctColumns(hasDistinctColumns);
    vectorDesc.setIsKeyBinarySortable(isKeyBinarySortable);
    vectorDesc.setIsValueLazyBinary(isValueLazyBinary);
    // Many restrictions.
    if (!isVectorizationReduceSinkNativeEnabled || !isTezOrSpark || hasBuckets || hasTopN || !useUniformHash || hasDistinctColumns || !isKeyBinarySortable || !isValueLazyBinary) {
        return false;
    }
    // We are doing work here we'd normally do in VectorGroupByCommonOperator's constructor.
    // So if we later decide not to specialize, we'll just waste any scratch columns allocated...
    List<ExprNodeDesc> keysDescs = desc.getKeyCols();
    VectorExpression[] allKeyExpressions = vContext.getVectorExpressions(keysDescs);
    // Since a key expression can be a calculation and the key will go into a scratch column,
    // we need the mapping and type information.
    int[] reduceSinkKeyColumnMap = new int[allKeyExpressions.length];
    TypeInfo[] reduceSinkKeyTypeInfos = new TypeInfo[allKeyExpressions.length];
    Type[] reduceSinkKeyColumnVectorTypes = new Type[allKeyExpressions.length];
    ArrayList<VectorExpression> groupByKeyExpressionsList = new ArrayList<VectorExpression>();
    VectorExpression[] reduceSinkKeyExpressions;
    for (int i = 0; i < reduceSinkKeyColumnMap.length; i++) {
        VectorExpression ve = allKeyExpressions[i];
        reduceSinkKeyColumnMap[i] = ve.getOutputColumn();
        reduceSinkKeyTypeInfos[i] = keysDescs.get(i).getTypeInfo();
        reduceSinkKeyColumnVectorTypes[i] = VectorizationContext.getColumnVectorTypeFromTypeInfo(reduceSinkKeyTypeInfos[i]);
        if (!IdentityExpression.isColumnOnly(ve)) {
            groupByKeyExpressionsList.add(ve);
        }
    }
    if (groupByKeyExpressionsList.size() == 0) {
        reduceSinkKeyExpressions = null;
    } else {
        reduceSinkKeyExpressions = groupByKeyExpressionsList.toArray(new VectorExpression[0]);
    }
    ArrayList<ExprNodeDesc> valueDescs = desc.getValueCols();
    VectorExpression[] allValueExpressions = vContext.getVectorExpressions(valueDescs);
    int[] reduceSinkValueColumnMap = new int[valueDescs.size()];
    TypeInfo[] reduceSinkValueTypeInfos = new TypeInfo[valueDescs.size()];
    Type[] reduceSinkValueColumnVectorTypes = new Type[valueDescs.size()];
    ArrayList<VectorExpression> reduceSinkValueExpressionsList = new ArrayList<VectorExpression>();
    VectorExpression[] reduceSinkValueExpressions;
    for (int i = 0; i < valueDescs.size(); ++i) {
        VectorExpression ve = allValueExpressions[i];
        reduceSinkValueColumnMap[i] = ve.getOutputColumn();
        reduceSinkValueTypeInfos[i] = valueDescs.get(i).getTypeInfo();
        reduceSinkValueColumnVectorTypes[i] = VectorizationContext.getColumnVectorTypeFromTypeInfo(reduceSinkValueTypeInfos[i]);
        if (!IdentityExpression.isColumnOnly(ve)) {
            reduceSinkValueExpressionsList.add(ve);
        }
    }
    if (reduceSinkValueExpressionsList.size() == 0) {
        reduceSinkValueExpressions = null;
    } else {
        reduceSinkValueExpressions = reduceSinkValueExpressionsList.toArray(new VectorExpression[0]);
    }
    vectorReduceSinkInfo.setReduceSinkKeyColumnMap(reduceSinkKeyColumnMap);
    vectorReduceSinkInfo.setReduceSinkKeyTypeInfos(reduceSinkKeyTypeInfos);
    vectorReduceSinkInfo.setReduceSinkKeyColumnVectorTypes(reduceSinkKeyColumnVectorTypes);
    vectorReduceSinkInfo.setReduceSinkKeyExpressions(reduceSinkKeyExpressions);
    vectorReduceSinkInfo.setReduceSinkValueColumnMap(reduceSinkValueColumnMap);
    vectorReduceSinkInfo.setReduceSinkValueTypeInfos(reduceSinkValueTypeInfos);
    vectorReduceSinkInfo.setReduceSinkValueColumnVectorTypes(reduceSinkValueColumnVectorTypes);
    vectorReduceSinkInfo.setReduceSinkValueExpressions(reduceSinkValueExpressions);
    return true;
}
Also used : ArrayList(java.util.ArrayList) VectorReduceSinkDesc(org.apache.hadoop.hive.ql.plan.VectorReduceSinkDesc) LazyBinarySerDe(org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe) UDFToString(org.apache.hadoop.hive.ql.udf.UDFToString) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) InConstantType(org.apache.hadoop.hive.ql.exec.vector.VectorizationContext.InConstantType) HashTableImplementationType(org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableImplementationType) HashTableKeyType(org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType) Type(org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type) VectorDeserializeType(org.apache.hadoop.hive.ql.plan.VectorPartitionDesc.VectorDeserializeType) OperatorType(org.apache.hadoop.hive.ql.plan.api.OperatorType) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Example 49 with VectorExpression

use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression in project hive by apache.

the class Vectorizer method vectorizeFilterOperator.

public static Operator<? extends OperatorDesc> vectorizeFilterOperator(Operator<? extends OperatorDesc> filterOp, VectorizationContext vContext) throws HiveException {
    FilterDesc filterDesc = (FilterDesc) filterOp.getConf();
    VectorFilterDesc vectorFilterDesc = new VectorFilterDesc();
    filterDesc.setVectorDesc(vectorFilterDesc);
    ExprNodeDesc predicateExpr = filterDesc.getPredicate();
    VectorExpression vectorPredicateExpr = vContext.getVectorExpression(predicateExpr, VectorExpressionDescriptor.Mode.FILTER);
    vectorFilterDesc.setPredicateExpression(vectorPredicateExpr);
    return OperatorFactory.getVectorOperator(filterOp.getCompilationOpContext(), filterDesc, vContext);
}
Also used : VectorFilterDesc(org.apache.hadoop.hive.ql.plan.VectorFilterDesc) VectorFilterDesc(org.apache.hadoop.hive.ql.plan.VectorFilterDesc) FilterDesc(org.apache.hadoop.hive.ql.plan.FilterDesc) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Example 50 with VectorExpression

use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression in project hive by apache.

the class Vectorizer method vectorizeSelectOperator.

public static Operator<? extends OperatorDesc> vectorizeSelectOperator(Operator<? extends OperatorDesc> selectOp, VectorizationContext vContext) throws HiveException {
    SelectDesc selectDesc = (SelectDesc) selectOp.getConf();
    VectorSelectDesc vectorSelectDesc = new VectorSelectDesc();
    selectDesc.setVectorDesc(vectorSelectDesc);
    List<ExprNodeDesc> colList = selectDesc.getColList();
    int index = 0;
    final int size = colList.size();
    VectorExpression[] vectorSelectExprs = new VectorExpression[size];
    int[] projectedOutputColumns = new int[size];
    for (int i = 0; i < size; i++) {
        ExprNodeDesc expr = colList.get(i);
        VectorExpression ve = vContext.getVectorExpression(expr);
        projectedOutputColumns[i] = ve.getOutputColumn();
        if (ve instanceof IdentityExpression) {
            // Suppress useless evaluation.
            continue;
        }
        vectorSelectExprs[index++] = ve;
    }
    if (index < size) {
        vectorSelectExprs = Arrays.copyOf(vectorSelectExprs, index);
    }
    vectorSelectDesc.setSelectExpressions(vectorSelectExprs);
    vectorSelectDesc.setProjectedOutputColumns(projectedOutputColumns);
    return OperatorFactory.getVectorOperator(selectOp.getCompilationOpContext(), selectDesc, vContext);
}
Also used : VectorSelectDesc(org.apache.hadoop.hive.ql.plan.VectorSelectDesc) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc) VectorSelectDesc(org.apache.hadoop.hive.ql.plan.VectorSelectDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) IdentityExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.IdentityExpression)

Aggregations

VectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)78 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)41 ArrayList (java.util.ArrayList)37 Test (org.junit.Test)28 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)26 ExprNodeGenericFuncDesc (org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)26 DynamicValueVectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.DynamicValueVectorExpression)24 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)21 VectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch)17 ExprNodeConstantDesc (org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc)17 IOException (java.io.IOException)13 JoinUtil (org.apache.hadoop.hive.ql.exec.JoinUtil)12 GenericUDFOPGreaterThan (org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan)10 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)8 List (java.util.List)7 UDFToString (org.apache.hadoop.hive.ql.udf.UDFToString)7 VectorMapJoinHashTableResult (org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashTableResult)6 GenericUDF (org.apache.hadoop.hive.ql.udf.generic.GenericUDF)6 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)6 StructTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo)6