Search in sources :

Example 1 with Output

use of org.apache.hadoop.hive.serde2.ByteStream.Output in project hive by apache.

the class Vectorizer method canSpecializeMapJoin.

private boolean canSpecializeMapJoin(Operator<? extends OperatorDesc> op, MapJoinDesc desc, boolean isTezOrSpark, VectorizationContext vContext, VectorMapJoinInfo vectorMapJoinInfo) throws HiveException {
    Preconditions.checkState(op instanceof MapJoinOperator);
    // Allocate a VectorReduceSinkDesc initially with implementation type NONE so EXPLAIN
    // can report this operator was vectorized, but not native.  And, the conditions.
    VectorMapJoinDesc vectorDesc = new VectorMapJoinDesc();
    desc.setVectorDesc(vectorDesc);
    boolean isVectorizationMapJoinNativeEnabled = HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_ENABLED);
    String engine = HiveConf.getVar(hiveConf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE);
    boolean oneMapJoinCondition = (desc.getConds().length == 1);
    boolean hasNullSafes = onExpressionHasNullSafes(desc);
    byte posBigTable = (byte) desc.getPosBigTable();
    // Since we want to display all the met and not met conditions in EXPLAIN, we determine all
    // information first....
    List<ExprNodeDesc> keyDesc = desc.getKeys().get(posBigTable);
    VectorExpression[] allBigTableKeyExpressions = vContext.getVectorExpressions(keyDesc);
    final int allBigTableKeyExpressionsLength = allBigTableKeyExpressions.length;
    // Assume.
    boolean supportsKeyTypes = true;
    HashSet<String> notSupportedKeyTypes = new HashSet<String>();
    // Since a key expression can be a calculation and the key will go into a scratch column,
    // we need the mapping and type information.
    int[] bigTableKeyColumnMap = new int[allBigTableKeyExpressionsLength];
    String[] bigTableKeyColumnNames = new String[allBigTableKeyExpressionsLength];
    TypeInfo[] bigTableKeyTypeInfos = new TypeInfo[allBigTableKeyExpressionsLength];
    ArrayList<VectorExpression> bigTableKeyExpressionsList = new ArrayList<VectorExpression>();
    VectorExpression[] bigTableKeyExpressions;
    for (int i = 0; i < allBigTableKeyExpressionsLength; i++) {
        VectorExpression ve = allBigTableKeyExpressions[i];
        if (!IdentityExpression.isColumnOnly(ve)) {
            bigTableKeyExpressionsList.add(ve);
        }
        bigTableKeyColumnMap[i] = ve.getOutputColumn();
        ExprNodeDesc exprNode = keyDesc.get(i);
        bigTableKeyColumnNames[i] = exprNode.toString();
        TypeInfo typeInfo = exprNode.getTypeInfo();
        // same check used in HashTableLoader.
        if (!MapJoinKey.isSupportedField(typeInfo)) {
            supportsKeyTypes = false;
            Category category = typeInfo.getCategory();
            notSupportedKeyTypes.add((category != Category.PRIMITIVE ? category.toString() : ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory().toString()));
        }
        bigTableKeyTypeInfos[i] = typeInfo;
    }
    if (bigTableKeyExpressionsList.size() == 0) {
        bigTableKeyExpressions = null;
    } else {
        bigTableKeyExpressions = bigTableKeyExpressionsList.toArray(new VectorExpression[0]);
    }
    List<ExprNodeDesc> bigTableExprs = desc.getExprs().get(posBigTable);
    VectorExpression[] allBigTableValueExpressions = vContext.getVectorExpressions(bigTableExprs);
    boolean isFastHashTableEnabled = HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_FAST_HASHTABLE_ENABLED);
    // Especially since LLAP is prone to turn it off in the MapJoinDesc in later
    // physical optimizer stages...
    boolean isHybridHashJoin = desc.isHybridHashJoin();
    /*
     * Populate vectorMapJoininfo.
     */
    /*
     * Similarly, we need a mapping since a value expression can be a calculation and the value
     * will go into a scratch column.
     */
    int[] bigTableValueColumnMap = new int[allBigTableValueExpressions.length];
    String[] bigTableValueColumnNames = new String[allBigTableValueExpressions.length];
    TypeInfo[] bigTableValueTypeInfos = new TypeInfo[allBigTableValueExpressions.length];
    ArrayList<VectorExpression> bigTableValueExpressionsList = new ArrayList<VectorExpression>();
    VectorExpression[] bigTableValueExpressions;
    for (int i = 0; i < bigTableValueColumnMap.length; i++) {
        VectorExpression ve = allBigTableValueExpressions[i];
        if (!IdentityExpression.isColumnOnly(ve)) {
            bigTableValueExpressionsList.add(ve);
        }
        bigTableValueColumnMap[i] = ve.getOutputColumn();
        ExprNodeDesc exprNode = bigTableExprs.get(i);
        bigTableValueColumnNames[i] = exprNode.toString();
        bigTableValueTypeInfos[i] = exprNode.getTypeInfo();
    }
    if (bigTableValueExpressionsList.size() == 0) {
        bigTableValueExpressions = null;
    } else {
        bigTableValueExpressions = bigTableValueExpressionsList.toArray(new VectorExpression[0]);
    }
    vectorMapJoinInfo.setBigTableKeyColumnMap(bigTableKeyColumnMap);
    vectorMapJoinInfo.setBigTableKeyColumnNames(bigTableKeyColumnNames);
    vectorMapJoinInfo.setBigTableKeyTypeInfos(bigTableKeyTypeInfos);
    vectorMapJoinInfo.setBigTableKeyExpressions(bigTableKeyExpressions);
    vectorMapJoinInfo.setBigTableValueColumnMap(bigTableValueColumnMap);
    vectorMapJoinInfo.setBigTableValueColumnNames(bigTableValueColumnNames);
    vectorMapJoinInfo.setBigTableValueTypeInfos(bigTableValueTypeInfos);
    vectorMapJoinInfo.setBigTableValueExpressions(bigTableValueExpressions);
    /*
     * Small table information.
     */
    VectorColumnOutputMapping bigTableRetainedMapping = new VectorColumnOutputMapping("Big Table Retained Mapping");
    VectorColumnOutputMapping bigTableOuterKeyMapping = new VectorColumnOutputMapping("Big Table Outer Key Mapping");
    // The order of the fields in the LazyBinary small table value must be used, so
    // we use the source ordering flavor for the mapping.
    VectorColumnSourceMapping smallTableMapping = new VectorColumnSourceMapping("Small Table Mapping");
    Byte[] order = desc.getTagOrder();
    Byte posSingleVectorMapJoinSmallTable = (order[0] == posBigTable ? order[1] : order[0]);
    boolean isOuterJoin = !desc.getNoOuterJoin();
    /*
     * Gather up big and small table output result information from the MapJoinDesc.
     */
    List<Integer> bigTableRetainList = desc.getRetainList().get(posBigTable);
    int bigTableRetainSize = bigTableRetainList.size();
    int[] smallTableIndices;
    int smallTableIndicesSize;
    List<ExprNodeDesc> smallTableExprs = desc.getExprs().get(posSingleVectorMapJoinSmallTable);
    if (desc.getValueIndices() != null && desc.getValueIndices().get(posSingleVectorMapJoinSmallTable) != null) {
        smallTableIndices = desc.getValueIndices().get(posSingleVectorMapJoinSmallTable);
        smallTableIndicesSize = smallTableIndices.length;
    } else {
        smallTableIndices = null;
        smallTableIndicesSize = 0;
    }
    List<Integer> smallTableRetainList = desc.getRetainList().get(posSingleVectorMapJoinSmallTable);
    int smallTableRetainSize = smallTableRetainList.size();
    int smallTableResultSize = 0;
    if (smallTableIndicesSize > 0) {
        smallTableResultSize = smallTableIndicesSize;
    } else if (smallTableRetainSize > 0) {
        smallTableResultSize = smallTableRetainSize;
    }
    /*
     * Determine the big table retained mapping first so we can optimize out (with
     * projection) copying inner join big table keys in the subsequent small table results section.
     */
    // We use a mapping object here so we can build the projection in any order and
    // get the ordered by 0 to n-1 output columns at the end.
    //
    // Also, to avoid copying a big table key into the small table result area for inner joins,
    // we reference it with the projection so there can be duplicate output columns
    // in the projection.
    VectorColumnSourceMapping projectionMapping = new VectorColumnSourceMapping("Projection Mapping");
    int nextOutputColumn = (order[0] == posBigTable ? 0 : smallTableResultSize);
    for (int i = 0; i < bigTableRetainSize; i++) {
        // Since bigTableValueExpressions may do a calculation and produce a scratch column, we
        // need to map to the right batch column.
        int retainColumn = bigTableRetainList.get(i);
        int batchColumnIndex = bigTableValueColumnMap[retainColumn];
        TypeInfo typeInfo = bigTableValueTypeInfos[i];
        // With this map we project the big table batch to make it look like an output batch.
        projectionMapping.add(nextOutputColumn, batchColumnIndex, typeInfo);
        // Collect columns we copy from the big table batch to the overflow batch.
        if (!bigTableRetainedMapping.containsOutputColumn(batchColumnIndex)) {
            // Tolerate repeated use of a big table column.
            bigTableRetainedMapping.add(batchColumnIndex, batchColumnIndex, typeInfo);
        }
        nextOutputColumn++;
    }
    /*
     * Now determine the small table results.
     */
    boolean smallTableExprVectorizes = true;
    int firstSmallTableOutputColumn;
    firstSmallTableOutputColumn = (order[0] == posBigTable ? bigTableRetainSize : 0);
    int smallTableOutputCount = 0;
    nextOutputColumn = firstSmallTableOutputColumn;
    // Small table indices has more information (i.e. keys) than retain, so use it if it exists...
    String[] bigTableRetainedNames;
    if (smallTableIndicesSize > 0) {
        smallTableOutputCount = smallTableIndicesSize;
        bigTableRetainedNames = new String[smallTableOutputCount];
        for (int i = 0; i < smallTableIndicesSize; i++) {
            if (smallTableIndices[i] >= 0) {
                // Zero and above numbers indicate a big table key is needed for
                // small table result "area".
                int keyIndex = smallTableIndices[i];
                // Since bigTableKeyExpressions may do a calculation and produce a scratch column, we
                // need to map the right column.
                int batchKeyColumn = bigTableKeyColumnMap[keyIndex];
                bigTableRetainedNames[i] = bigTableKeyColumnNames[keyIndex];
                TypeInfo typeInfo = bigTableKeyTypeInfos[keyIndex];
                if (!isOuterJoin) {
                    // Optimize inner join keys of small table results.
                    // Project the big table key into the small table result "area".
                    projectionMapping.add(nextOutputColumn, batchKeyColumn, typeInfo);
                    if (!bigTableRetainedMapping.containsOutputColumn(batchKeyColumn)) {
                        // If necessary, copy the big table key into the overflow batch's small table
                        // result "area".
                        bigTableRetainedMapping.add(batchKeyColumn, batchKeyColumn, typeInfo);
                    }
                } else {
                    // For outer joins, since the small table key can be null when there is no match,
                    // we must have a physical (scratch) column for those keys.  We cannot use the
                    // projection optimization used by inner joins above.
                    int scratchColumn = vContext.allocateScratchColumn(typeInfo);
                    projectionMapping.add(nextOutputColumn, scratchColumn, typeInfo);
                    bigTableRetainedMapping.add(batchKeyColumn, scratchColumn, typeInfo);
                    bigTableOuterKeyMapping.add(batchKeyColumn, scratchColumn, typeInfo);
                }
            } else {
                // Negative numbers indicate a column to be (deserialize) read from the small table's
                // LazyBinary value row.
                int smallTableValueIndex = -smallTableIndices[i] - 1;
                ExprNodeDesc smallTableExprNode = smallTableExprs.get(i);
                if (!validateExprNodeDesc(smallTableExprNode, "Small Table")) {
                    clearNotVectorizedReason();
                    smallTableExprVectorizes = false;
                }
                bigTableRetainedNames[i] = smallTableExprNode.toString();
                TypeInfo typeInfo = smallTableExprNode.getTypeInfo();
                // Make a new big table scratch column for the small table value.
                int scratchColumn = vContext.allocateScratchColumn(typeInfo);
                projectionMapping.add(nextOutputColumn, scratchColumn, typeInfo);
                smallTableMapping.add(smallTableValueIndex, scratchColumn, typeInfo);
            }
            nextOutputColumn++;
        }
    } else if (smallTableRetainSize > 0) {
        smallTableOutputCount = smallTableRetainSize;
        bigTableRetainedNames = new String[smallTableOutputCount];
        for (int i = 0; i < smallTableRetainSize; i++) {
            int smallTableValueIndex = smallTableRetainList.get(i);
            ExprNodeDesc smallTableExprNode = smallTableExprs.get(i);
            if (!validateExprNodeDesc(smallTableExprNode, "Small Table")) {
                clearNotVectorizedReason();
                smallTableExprVectorizes = false;
            }
            bigTableRetainedNames[i] = smallTableExprNode.toString();
            // Make a new big table scratch column for the small table value.
            TypeInfo typeInfo = smallTableExprNode.getTypeInfo();
            int scratchColumn = vContext.allocateScratchColumn(typeInfo);
            projectionMapping.add(nextOutputColumn, scratchColumn, typeInfo);
            smallTableMapping.add(smallTableValueIndex, scratchColumn, typeInfo);
            nextOutputColumn++;
        }
    } else {
        bigTableRetainedNames = new String[0];
    }
    boolean useOptimizedTable = HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEMAPJOINUSEOPTIMIZEDTABLE);
    // Remember the condition variables for EXPLAIN regardless of whether we specialize or not.
    vectorDesc.setUseOptimizedTable(useOptimizedTable);
    vectorDesc.setIsVectorizationMapJoinNativeEnabled(isVectorizationMapJoinNativeEnabled);
    vectorDesc.setEngine(engine);
    vectorDesc.setOneMapJoinCondition(oneMapJoinCondition);
    vectorDesc.setHasNullSafes(hasNullSafes);
    vectorDesc.setSmallTableExprVectorizes(smallTableExprVectorizes);
    vectorDesc.setIsFastHashTableEnabled(isFastHashTableEnabled);
    vectorDesc.setIsHybridHashJoin(isHybridHashJoin);
    vectorDesc.setSupportsKeyTypes(supportsKeyTypes);
    if (!supportsKeyTypes) {
        vectorDesc.setNotSupportedKeyTypes(new ArrayList(notSupportedKeyTypes));
    }
    // Check common conditions for both Optimized and Fast Hash Tables.
    // Assume.
    boolean result = true;
    if (!useOptimizedTable || !isVectorizationMapJoinNativeEnabled || !isTezOrSpark || !oneMapJoinCondition || hasNullSafes || !smallTableExprVectorizes) {
        result = false;
    }
    if (!isFastHashTableEnabled) {
        // Check optimized-only hash table restrictions.
        if (!supportsKeyTypes) {
            result = false;
        }
    } else {
        if (isHybridHashJoin) {
            result = false;
        }
    }
    // Convert dynamic arrays and maps to simple arrays.
    bigTableRetainedMapping.finalize();
    bigTableOuterKeyMapping.finalize();
    smallTableMapping.finalize();
    vectorMapJoinInfo.setBigTableRetainedMapping(bigTableRetainedMapping);
    vectorMapJoinInfo.setBigTableOuterKeyMapping(bigTableOuterKeyMapping);
    vectorMapJoinInfo.setSmallTableMapping(smallTableMapping);
    projectionMapping.finalize();
    // Verify we added an entry for each output.
    assert projectionMapping.isSourceSequenceGood();
    vectorMapJoinInfo.setProjectionMapping(projectionMapping);
    return result;
}
Also used : VectorMapJoinDesc(org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc) PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) Category(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category) ArrayList(java.util.ArrayList) VectorColumnOutputMapping(org.apache.hadoop.hive.ql.exec.vector.VectorColumnOutputMapping) UDFToString(org.apache.hadoop.hive.ql.udf.UDFToString) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) VectorColumnSourceMapping(org.apache.hadoop.hive.ql.exec.vector.VectorColumnSourceMapping) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) HashSet(java.util.HashSet) VectorMapJoinOperator(org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) UDFToInteger(org.apache.hadoop.hive.ql.udf.UDFToInteger) UDFToByte(org.apache.hadoop.hive.ql.udf.UDFToByte) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)

Example 2 with Output

use of org.apache.hadoop.hive.serde2.ByteStream.Output in project hive by apache.

the class VectorUDFAdaptor method setOutputCol.

private void setOutputCol(ColumnVector colVec, int i, Object value) {
    /* Depending on the output type, get the value, cast the result to the
     * correct type if needed, and assign the result into the output vector.
     */
    if (outputOI instanceof WritableStringObjectInspector) {
        BytesColumnVector bv = (BytesColumnVector) colVec;
        Text t;
        if (value instanceof String) {
            t = new Text((String) value);
        } else {
            t = ((WritableStringObjectInspector) outputOI).getPrimitiveWritableObject(value);
        }
        bv.setVal(i, t.getBytes(), 0, t.getLength());
    } else if (outputOI instanceof WritableHiveCharObjectInspector) {
        WritableHiveCharObjectInspector writableHiveCharObjectOI = (WritableHiveCharObjectInspector) outputOI;
        int maxLength = ((CharTypeInfo) writableHiveCharObjectOI.getTypeInfo()).getLength();
        BytesColumnVector bv = (BytesColumnVector) colVec;
        HiveCharWritable hiveCharWritable;
        if (value instanceof HiveCharWritable) {
            hiveCharWritable = ((HiveCharWritable) value);
        } else {
            hiveCharWritable = writableHiveCharObjectOI.getPrimitiveWritableObject(value);
        }
        Text t = hiveCharWritable.getTextValue();
        // In vector mode, we stored CHAR as unpadded.
        StringExpr.rightTrimAndTruncate(bv, i, t.getBytes(), 0, t.getLength(), maxLength);
    } else if (outputOI instanceof WritableHiveVarcharObjectInspector) {
        WritableHiveVarcharObjectInspector writableHiveVarcharObjectOI = (WritableHiveVarcharObjectInspector) outputOI;
        int maxLength = ((VarcharTypeInfo) writableHiveVarcharObjectOI.getTypeInfo()).getLength();
        BytesColumnVector bv = (BytesColumnVector) colVec;
        HiveVarcharWritable hiveVarcharWritable;
        if (value instanceof HiveVarcharWritable) {
            hiveVarcharWritable = ((HiveVarcharWritable) value);
        } else {
            hiveVarcharWritable = writableHiveVarcharObjectOI.getPrimitiveWritableObject(value);
        }
        Text t = hiveVarcharWritable.getTextValue();
        StringExpr.truncate(bv, i, t.getBytes(), 0, t.getLength(), maxLength);
    } else if (outputOI instanceof WritableIntObjectInspector) {
        LongColumnVector lv = (LongColumnVector) colVec;
        if (value instanceof Integer) {
            lv.vector[i] = (Integer) value;
        } else {
            lv.vector[i] = ((WritableIntObjectInspector) outputOI).get(value);
        }
    } else if (outputOI instanceof WritableLongObjectInspector) {
        LongColumnVector lv = (LongColumnVector) colVec;
        if (value instanceof Long) {
            lv.vector[i] = (Long) value;
        } else {
            lv.vector[i] = ((WritableLongObjectInspector) outputOI).get(value);
        }
    } else if (outputOI instanceof WritableDoubleObjectInspector) {
        DoubleColumnVector dv = (DoubleColumnVector) colVec;
        if (value instanceof Double) {
            dv.vector[i] = (Double) value;
        } else {
            dv.vector[i] = ((WritableDoubleObjectInspector) outputOI).get(value);
        }
    } else if (outputOI instanceof WritableFloatObjectInspector) {
        DoubleColumnVector dv = (DoubleColumnVector) colVec;
        if (value instanceof Float) {
            dv.vector[i] = (Float) value;
        } else {
            dv.vector[i] = ((WritableFloatObjectInspector) outputOI).get(value);
        }
    } else if (outputOI instanceof WritableShortObjectInspector) {
        LongColumnVector lv = (LongColumnVector) colVec;
        if (value instanceof Short) {
            lv.vector[i] = (Short) value;
        } else {
            lv.vector[i] = ((WritableShortObjectInspector) outputOI).get(value);
        }
    } else if (outputOI instanceof WritableByteObjectInspector) {
        LongColumnVector lv = (LongColumnVector) colVec;
        if (value instanceof Byte) {
            lv.vector[i] = (Byte) value;
        } else {
            lv.vector[i] = ((WritableByteObjectInspector) outputOI).get(value);
        }
    } else if (outputOI instanceof WritableTimestampObjectInspector) {
        TimestampColumnVector tv = (TimestampColumnVector) colVec;
        Timestamp ts;
        if (value instanceof Timestamp) {
            ts = (Timestamp) value;
        } else {
            ts = ((WritableTimestampObjectInspector) outputOI).getPrimitiveJavaObject(value);
        }
        tv.set(i, ts);
    } else if (outputOI instanceof WritableDateObjectInspector) {
        LongColumnVector lv = (LongColumnVector) colVec;
        Date ts;
        if (value instanceof Date) {
            ts = (Date) value;
        } else {
            ts = ((WritableDateObjectInspector) outputOI).getPrimitiveJavaObject(value);
        }
        long l = DateWritable.dateToDays(ts);
        lv.vector[i] = l;
    } else if (outputOI instanceof WritableBooleanObjectInspector) {
        LongColumnVector lv = (LongColumnVector) colVec;
        if (value instanceof Boolean) {
            lv.vector[i] = (Boolean) value ? 1 : 0;
        } else {
            lv.vector[i] = ((WritableBooleanObjectInspector) outputOI).get(value) ? 1 : 0;
        }
    } else if (outputOI instanceof WritableHiveDecimalObjectInspector) {
        DecimalColumnVector dcv = (DecimalColumnVector) colVec;
        if (value instanceof HiveDecimal) {
            dcv.set(i, (HiveDecimal) value);
        } else {
            HiveDecimal hd = ((WritableHiveDecimalObjectInspector) outputOI).getPrimitiveJavaObject(value);
            dcv.set(i, hd);
        }
    } else if (outputOI instanceof WritableBinaryObjectInspector) {
        BytesWritable bw = (BytesWritable) value;
        BytesColumnVector bv = (BytesColumnVector) colVec;
        bv.setVal(i, bw.getBytes(), 0, bw.getLength());
    } else {
        throw new RuntimeException("Unhandled object type " + outputOI.getTypeName() + " inspector class " + outputOI.getClass().getName() + " value class " + value.getClass().getName());
    }
}
Also used : VarcharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo) Timestamp(java.sql.Timestamp) HiveDecimal(org.apache.hadoop.hive.common.type.HiveDecimal) HiveCharWritable(org.apache.hadoop.hive.serde2.io.HiveCharWritable) HiveVarcharWritable(org.apache.hadoop.hive.serde2.io.HiveVarcharWritable) Text(org.apache.hadoop.io.Text) BytesWritable(org.apache.hadoop.io.BytesWritable) Date(java.sql.Date) WritableBinaryObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBinaryObjectInspector)

Example 3 with Output

use of org.apache.hadoop.hive.serde2.ByteStream.Output in project hive by apache.

the class TaskCompiler method genColumnStatsTask.

/**
   * A helper function to generate a column stats task on top of map-red task. The column stats
   * task fetches from the output of the map-red task, constructs the column stats object and
   * persists it to the metastore.
   *
   * This method generates a plan with a column stats task on top of map-red task and sets up the
   * appropriate metadata to be used during execution.
   *
   * @param analyzeRewrite
   * @param loadTableWork
   * @param loadFileWork
   * @param rootTasks
   * @param outerQueryLimit
   */
@SuppressWarnings("unchecked")
protected void genColumnStatsTask(AnalyzeRewriteContext analyzeRewrite, List<LoadFileDesc> loadFileWork, Set<Task<? extends Serializable>> leafTasks, int outerQueryLimit, int numBitVector) {
    ColumnStatsTask cStatsTask = null;
    ColumnStatsWork cStatsWork = null;
    FetchWork fetch = null;
    String tableName = analyzeRewrite.getTableName();
    List<String> colName = analyzeRewrite.getColName();
    List<String> colType = analyzeRewrite.getColType();
    boolean isTblLevel = analyzeRewrite.isTblLvl();
    String cols = loadFileWork.get(0).getColumns();
    String colTypes = loadFileWork.get(0).getColumnTypes();
    String resFileFormat;
    TableDesc resultTab;
    if (SessionState.get().isHiveServerQuery() && conf.getBoolVar(HiveConf.ConfVars.HIVE_SERVER2_THRIFT_RESULTSET_SERIALIZE_IN_TASKS)) {
        resFileFormat = "SequenceFile";
        resultTab = PlanUtils.getDefaultQueryOutputTableDesc(cols, colTypes, resFileFormat, ThriftJDBCBinarySerDe.class);
    } else {
        resFileFormat = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEQUERYRESULTFILEFORMAT);
        resultTab = PlanUtils.getDefaultQueryOutputTableDesc(cols, colTypes, resFileFormat, LazySimpleSerDe.class);
    }
    fetch = new FetchWork(loadFileWork.get(0).getSourcePath(), resultTab, outerQueryLimit);
    ColumnStatsDesc cStatsDesc = new ColumnStatsDesc(tableName, colName, colType, isTblLevel, numBitVector);
    cStatsWork = new ColumnStatsWork(fetch, cStatsDesc);
    cStatsTask = (ColumnStatsTask) TaskFactory.get(cStatsWork, conf);
    for (Task<? extends Serializable> tsk : leafTasks) {
        tsk.addDependentTask(cStatsTask);
    }
}
Also used : ColumnStatsDesc(org.apache.hadoop.hive.ql.plan.ColumnStatsDesc) ThriftJDBCBinarySerDe(org.apache.hadoop.hive.serde2.thrift.ThriftJDBCBinarySerDe) LazySimpleSerDe(org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe) FetchWork(org.apache.hadoop.hive.ql.plan.FetchWork) ColumnStatsWork(org.apache.hadoop.hive.ql.plan.ColumnStatsWork) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) LoadTableDesc(org.apache.hadoop.hive.ql.plan.LoadTableDesc) CreateTableDesc(org.apache.hadoop.hive.ql.plan.CreateTableDesc) ColumnStatsTask(org.apache.hadoop.hive.ql.exec.ColumnStatsTask)

Example 4 with Output

use of org.apache.hadoop.hive.serde2.ByteStream.Output in project hive by apache.

the class VectorUDAFAvgDecimal method initPartialResultInspector.

private void initPartialResultInspector() {
    // the output type of the vectorized partial aggregate must match the
    // expected type for the row-mode aggregation
    // For decimal, the type is "same number of integer digits and 4 more decimal digits"
    DecimalTypeInfo dtiSum = GenericUDAFAverage.deriveSumFieldTypeInfo(inputPrecision, inputScale);
    this.sumScale = (short) dtiSum.scale();
    this.sumPrecision = (short) dtiSum.precision();
    List<ObjectInspector> foi = new ArrayList<ObjectInspector>();
    foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector);
    foi.add(PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(dtiSum));
    List<String> fname = new ArrayList<String>();
    fname.add("count");
    fname.add("sum");
    soi = ObjectInspectorFactory.getStandardStructObjectInspector(fname, foi);
}
Also used : DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) ArrayList(java.util.ArrayList)

Example 5 with Output

use of org.apache.hadoop.hive.serde2.ByteStream.Output in project hive by apache.

the class TestVectorMapJoinRowBytesContainer method doFillReplay.

public void doFillReplay(Random random, int maxCount) throws Exception {
    RandomByteArrayStream randomByteArrayStream = new RandomByteArrayStream(random);
    VectorMapJoinRowBytesContainer vectorMapJoinRowBytesContainer = new VectorMapJoinRowBytesContainer(null);
    int count = Math.min(maxCount, random.nextInt(500));
    for (int i = 0; i < count; i++) {
        byte[] bytes = randomByteArrayStream.next();
        Output output = vectorMapJoinRowBytesContainer.getOuputForRowBytes();
        output.write(bytes);
        vectorMapJoinRowBytesContainer.finishRow();
    }
    vectorMapJoinRowBytesContainer.prepareForReading();
    for (int i = 0; i < count; i++) {
        if (!vectorMapJoinRowBytesContainer.readNext()) {
            assertTrue(false);
        }
        byte[] readBytes = vectorMapJoinRowBytesContainer.currentBytes();
        int readOffset = vectorMapJoinRowBytesContainer.currentOffset();
        int readLength = vectorMapJoinRowBytesContainer.currentLength();
        byte[] expectedBytes = randomByteArrayStream.get(i);
        if (readLength != expectedBytes.length) {
            assertTrue(false);
        }
        for (int j = 0; j < readLength; j++) {
            byte readByte = readBytes[readOffset + j];
            byte expectedByte = expectedBytes[j];
            if (readByte != expectedByte) {
                assertTrue(false);
            }
        }
    }
}
Also used : VectorMapJoinRowBytesContainer(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinRowBytesContainer) Output(org.apache.hadoop.hive.serde2.ByteStream.Output)

Aggregations

ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)77 ArrayList (java.util.ArrayList)72 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)48 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)48 Test (org.junit.Test)44 DeferredJavaObject (org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredJavaObject)43 Output (org.apache.hadoop.hive.serde2.ByteStream.Output)42 DeferredObject (org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject)41 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)41 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)37 Text (org.apache.hadoop.io.Text)35 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)32 IOException (java.io.IOException)29 DateWritableV2 (org.apache.hadoop.hive.serde2.io.DateWritableV2)24 BytesWritable (org.apache.hadoop.io.BytesWritable)23 VectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)22 HiveDecimalWritable (org.apache.hadoop.hive.serde2.io.HiveDecimalWritable)22 SerDeException (org.apache.hadoop.hive.serde2.SerDeException)21 TimestampWritableV2 (org.apache.hadoop.hive.serde2.io.TimestampWritableV2)21 List (java.util.List)18