Examples with VectorizedRowBatchCtx - org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx

Example 1 with VectorizedRowBatchCtx

use of org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx in project hive by apache.

the class Vectorizer method debugDisplayAllMaps.

public void debugDisplayAllMaps(BaseWork work) {
    VectorizedRowBatchCtx vectorizedRowBatchCtx = work.getVectorizedRowBatchCtx();
    String[] allColumnNames = vectorizedRowBatchCtx.getRowColumnNames();
    Object columnTypeInfos = vectorizedRowBatchCtx.getRowColumnTypeInfos();
    int partitionColumnCount = vectorizedRowBatchCtx.getPartitionColumnCount();
    String[] scratchColumnTypeNames = vectorizedRowBatchCtx.getScratchColumnTypeNames();
    LOG.debug("debugDisplayAllMaps allColumnNames " + Arrays.toString(allColumnNames));
    LOG.debug("debugDisplayAllMaps columnTypeInfos " + Arrays.deepToString((Object[]) columnTypeInfos));
    LOG.debug("debugDisplayAllMaps partitionColumnCount " + partitionColumnCount);
    LOG.debug("debugDisplayAllMaps scratchColumnTypeNames " + Arrays.toString(scratchColumnTypeNames));
}

Also used : VectorizedRowBatchCtx(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx) UDFToString(org.apache.hadoop.hive.ql.udf.UDFToString)

Example 2 with VectorizedRowBatchCtx

use of org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx in project hive by apache.

the class VectorDeserializeOrcWriter method createVrbCtx.

private static VectorizedRowBatchCtx createVrbCtx(StructObjectInspector oi, final Properties tblProps, final Configuration conf) throws IOException {
    final boolean useDecimal64ColumnVectors = HiveConf.getVar(conf, ConfVars.HIVE_VECTORIZED_INPUT_FORMAT_SUPPORTS_ENABLED).equalsIgnoreCase("decimal_64");
    final String serde = tblProps.getProperty(serdeConstants.SERIALIZATION_LIB);
    final String inputFormat = tblProps.getProperty(hive_metastoreConstants.FILE_INPUT_FORMAT);
    final boolean isTextFormat = inputFormat != null && inputFormat.equals(TextInputFormat.class.getName()) && serde != null && serde.equals(LazySimpleSerDe.class.getName());
    List<DataTypePhysicalVariation> dataTypePhysicalVariations = new ArrayList<>();
    if (isTextFormat) {
        StructTypeInfo structTypeInfo = (StructTypeInfo) TypeInfoUtils.getTypeInfoFromObjectInspector(oi);
        int dataColumnCount = structTypeInfo.getAllStructFieldTypeInfos().size();
        for (int i = 0; i < dataColumnCount; i++) {
            DataTypePhysicalVariation dataTypePhysicalVariation = DataTypePhysicalVariation.NONE;
            if (useDecimal64ColumnVectors) {
                TypeInfo typeInfo = structTypeInfo.getAllStructFieldTypeInfos().get(i);
                if (typeInfo instanceof DecimalTypeInfo) {
                    DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) typeInfo;
                    if (HiveDecimalWritable.isPrecisionDecimal64(decimalTypeInfo.precision())) {
                        dataTypePhysicalVariation = DataTypePhysicalVariation.DECIMAL_64;
                    }
                }
            }
            dataTypePhysicalVariations.add(dataTypePhysicalVariation);
        }
    }
    VectorizedRowBatchCtx vrbCtx = new VectorizedRowBatchCtx();
    try {
        vrbCtx.init(oi, new String[0]);
    } catch (HiveException e) {
        throw new IOException(e);
    }
    if (!dataTypePhysicalVariations.isEmpty()) {
        vrbCtx.setRowDataTypePhysicalVariations(dataTypePhysicalVariations.toArray(new DataTypePhysicalVariation[0]));
    }
    return vrbCtx;
}

Also used : DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) VectorizedRowBatchCtx(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx) DataTypePhysicalVariation(org.apache.hadoop.hive.common.type.DataTypePhysicalVariation) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) ArrayList(java.util.ArrayList) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) IOException(java.io.IOException) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)

Example 3 with VectorizedRowBatchCtx

use of org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx in project hive by apache.

the class KryoBench method mockMapWork.

public static MapWork mockMapWork(String tableName, int partitions, ObjectInspector objectInspector) throws Exception {
    Path root = new Path("/warehouse", tableName);
    String[] partPath = new String[partitions];
    StringBuilder buffer = new StringBuilder();
    for (int p = 0; p < partitions; ++p) {
        partPath[p] = new Path(root, "p=" + p).toString();
        if (p != 0) {
            buffer.append(',');
        }
        buffer.append(partPath[p]);
    }
    StringBuilder columnIds = new StringBuilder();
    StringBuilder columnNames = new StringBuilder();
    StringBuilder columnTypes = new StringBuilder();
    StructObjectInspector structOI = (StructObjectInspector) objectInspector;
    List<? extends StructField> fields = structOI.getAllStructFieldRefs();
    int numCols = fields.size();
    for (int i = 0; i < numCols; ++i) {
        if (i != 0) {
            columnIds.append(',');
            columnNames.append(',');
            columnTypes.append(',');
        }
        columnIds.append(i);
        columnNames.append(fields.get(i).getFieldName());
        columnTypes.append(fields.get(i).getFieldObjectInspector().getTypeName());
    }
    Properties tblProps = new Properties();
    tblProps.put("name", tableName);
    tblProps.put("serialization.lib", OrcSerde.class.getName());
    tblProps.put("columns", columnNames.toString());
    tblProps.put("columns.types", columnTypes.toString());
    TableDesc tbl = new TableDesc(OrcInputFormat.class, OrcOutputFormat.class, tblProps);
    MapWork mapWork = new MapWork();
    mapWork.setVectorMode(true);
    VectorizedRowBatchCtx vectorizedRowBatchCtx = new VectorizedRowBatchCtx();
    vectorizedRowBatchCtx.init(structOI, new String[0]);
    mapWork.setVectorizedRowBatchCtx(vectorizedRowBatchCtx);
    mapWork.setUseBucketizedHiveInputFormat(false);
    Map<Path, List<String>> aliasMap = new LinkedHashMap<>();
    List<String> aliases = new ArrayList<String>();
    aliases.add(tableName);
    LinkedHashMap<Path, PartitionDesc> partMap = new LinkedHashMap<>();
    for (int p = 0; p < partitions; ++p) {
        Path path = new Path(partPath[p]);
        aliasMap.put(path, aliases);
        LinkedHashMap<String, String> partSpec = new LinkedHashMap<String, String>();
        PartitionDesc part = new PartitionDesc(tbl, partSpec);
        part.setVectorPartitionDesc(VectorPartitionDesc.createVectorizedInputFileFormat("MockInputFileFormatClassName", false, null));
        partMap.put(path, part);
    }
    mapWork.setPathToAliases(aliasMap);
    mapWork.setPathToPartitionInfo(partMap);
    return mapWork;
}

Also used : Path(org.apache.hadoop.fs.Path) ArrayList(java.util.ArrayList) Properties(java.util.Properties) LinkedHashMap(java.util.LinkedHashMap) VectorizedRowBatchCtx(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx) MapWork(org.apache.hadoop.hive.ql.plan.MapWork) OrcSerde(org.apache.hadoop.hive.ql.io.orc.OrcSerde) ArrayList(java.util.ArrayList) List(java.util.List) PartitionDesc(org.apache.hadoop.hive.ql.plan.PartitionDesc) VectorPartitionDesc(org.apache.hadoop.hive.ql.plan.VectorPartitionDesc) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 4 with VectorizedRowBatchCtx

use of org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx in project hive by apache.

the class ColumnarStorageBench method initialVectorizedRowBatchCtx.

private void initialVectorizedRowBatchCtx(Configuration conf) throws HiveException {
    MapWork mapWork = new MapWork();
    VectorizedRowBatchCtx rbCtx = new VectorizedRowBatchCtx();
    rbCtx.init(createStructObjectInspector(conf), new String[0]);
    mapWork.setVectorMode(true);
    mapWork.setVectorizedRowBatchCtx(rbCtx);
    Utilities.setMapWork(conf, mapWork);
}

Also used : VectorizedRowBatchCtx(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx) MapWork(org.apache.hadoop.hive.ql.plan.MapWork)

Example 5 with VectorizedRowBatchCtx

use of org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx in project hive by apache.

the class AggregationBase method doVectorTest.

protected static boolean doVectorTest(String aggregationName, TypeInfo typeInfo, GenericUDAFEvaluator evaluator, TypeInfo outputTypeInfo, GenericUDAFEvaluator.Mode udafEvaluatorMode, int maxKeyCount, List<String> columns, String[] columnNames, TypeInfo[] typeInfos, DataTypePhysicalVariation[] dataTypePhysicalVariations, List<ExprNodeDesc> parameterList, VectorRandomBatchSource batchSource, Object[] results) throws Exception {
    HiveConf hiveConf = new HiveConf();
    VectorizationContext vectorizationContext = new VectorizationContext("name", columns, Arrays.asList(typeInfos), Arrays.asList(dataTypePhysicalVariations), hiveConf);
    ImmutablePair<VectorAggregationDesc, String> pair = Vectorizer.getVectorAggregationDesc(aggregationName, parameterList, evaluator, outputTypeInfo, udafEvaluatorMode, vectorizationContext);
    VectorAggregationDesc vecAggrDesc = pair.left;
    if (vecAggrDesc == null) {
        Assert.fail("No vector aggregation expression found for aggregationName " + aggregationName + " udafEvaluatorMode " + udafEvaluatorMode + " parameterList " + parameterList + " outputTypeInfo " + outputTypeInfo);
    }
    Class<? extends VectorAggregateExpression> vecAggrClass = vecAggrDesc.getVecAggrClass();
    Constructor<? extends VectorAggregateExpression> ctor = null;
    try {
        ctor = vecAggrClass.getConstructor(VectorAggregationDesc.class);
    } catch (Exception e) {
        throw new HiveException("Constructor " + vecAggrClass.getSimpleName() + "(VectorAggregationDesc) not available");
    }
    VectorAggregateExpression vecAggrExpr = null;
    try {
        vecAggrExpr = ctor.newInstance(vecAggrDesc);
    } catch (Exception e) {
        throw new HiveException("Failed to create " + vecAggrClass.getSimpleName() + "(VectorAggregationDesc) object ", e);
    }
    VectorExpression.doTransientInit(vecAggrExpr.getInputExpression(), hiveConf);
    // System.out.println("*VECTOR AGGREGATION EXPRESSION* " + vecAggrExpr.getClass().getSimpleName());
    /*
    System.out.println(
        "*DEBUG* typeInfo " + typeInfo.toString() +
        " aggregationTestMode VECTOR_MODE" +
        " vecAggrExpr " + vecAggrExpr.getClass().getSimpleName());
    */
    VectorRandomRowSource rowSource = batchSource.getRowSource();
    VectorizedRowBatchCtx batchContext = new VectorizedRowBatchCtx(columnNames, rowSource.typeInfos(), rowSource.dataTypePhysicalVariations(), /* dataColumnNums */
    null, /* partitionColumnCount */
    0, /* virtualColumnCount */
    0, /* neededVirtualColumns */
    null, vectorizationContext.getScratchColumnTypeNames(), vectorizationContext.getScratchDataTypePhysicalVariations());
    VectorizedRowBatch batch = batchContext.createVectorizedRowBatch();
    // Last entry is for a NULL key.
    VectorAggregationBufferRow[] vectorAggregationBufferRows = new VectorAggregationBufferRow[maxKeyCount + 1];
    VectorAggregationBufferRow[] batchBufferRows;
    batchSource.resetBatchIteration();
    int rowIndex = 0;
    while (true) {
        if (!batchSource.fillNextBatch(batch)) {
            break;
        }
        LongColumnVector keyLongColVector = (LongColumnVector) batch.cols[0];
        batchBufferRows = new VectorAggregationBufferRow[VectorizedRowBatch.DEFAULT_SIZE];
        final int size = batch.size;
        boolean selectedInUse = batch.selectedInUse;
        int[] selected = batch.selected;
        for (int logical = 0; logical < size; logical++) {
            final int batchIndex = (selectedInUse ? selected[logical] : logical);
            final int keyAdjustedBatchIndex;
            if (keyLongColVector.isRepeating) {
                keyAdjustedBatchIndex = 0;
            } else {
                keyAdjustedBatchIndex = batchIndex;
            }
            final short key;
            if (keyLongColVector.noNulls || !keyLongColVector.isNull[keyAdjustedBatchIndex]) {
                key = (short) keyLongColVector.vector[keyAdjustedBatchIndex];
            } else {
                key = (short) maxKeyCount;
            }
            VectorAggregationBufferRow bufferRow = vectorAggregationBufferRows[key];
            if (bufferRow == null) {
                VectorAggregateExpression.AggregationBuffer aggregationBuffer = vecAggrExpr.getNewAggregationBuffer();
                aggregationBuffer.reset();
                VectorAggregateExpression.AggregationBuffer[] aggregationBuffers = new VectorAggregateExpression.AggregationBuffer[] { aggregationBuffer };
                bufferRow = new VectorAggregationBufferRow(aggregationBuffers);
                vectorAggregationBufferRows[key] = bufferRow;
            }
            batchBufferRows[logical] = bufferRow;
        }
        vecAggrExpr.aggregateInputSelection(batchBufferRows, 0, batch);
        rowIndex += batch.size;
    }
    String[] outputColumnNames = new String[] { "output" };
    TypeInfo[] outputTypeInfos = new TypeInfo[] { outputTypeInfo };
    VectorizedRowBatchCtx outputBatchContext = new VectorizedRowBatchCtx(outputColumnNames, outputTypeInfos, new DataTypePhysicalVariation[] { vecAggrExpr.getOutputDataTypePhysicalVariation() }, /* dataColumnNums */
    null, /* partitionColumnCount */
    0, /* virtualColumnCount */
    0, /* neededVirtualColumns */
    null, new String[0], new DataTypePhysicalVariation[0]);
    VectorizedRowBatch outputBatch = outputBatchContext.createVectorizedRowBatch();
    short[] keys = new short[VectorizedRowBatch.DEFAULT_SIZE];
    VectorExtractRow resultVectorExtractRow = new VectorExtractRow();
    resultVectorExtractRow.init(new TypeInfo[] { outputTypeInfo }, new int[] { 0 });
    Object[] scrqtchRow = new Object[1];
    for (short key = 0; key < maxKeyCount + 1; key++) {
        VectorAggregationBufferRow vectorAggregationBufferRow = vectorAggregationBufferRows[key];
        if (vectorAggregationBufferRow != null) {
            if (outputBatch.size == VectorizedRowBatch.DEFAULT_SIZE) {
                extractResultObjects(outputBatch, keys, resultVectorExtractRow, outputTypeInfo, scrqtchRow, results);
                outputBatch.reset();
            }
            keys[outputBatch.size] = key;
            VectorAggregateExpression.AggregationBuffer aggregationBuffer = vectorAggregationBufferRow.getAggregationBuffer(0);
            vecAggrExpr.assignRowColumn(outputBatch, outputBatch.size++, 0, aggregationBuffer);
        }
    }
    if (outputBatch.size > 0) {
        extractResultObjects(outputBatch, keys, resultVectorExtractRow, outputTypeInfo, scrqtchRow, results);
    }
    return true;
}

Also used : HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) VectorizationContext(org.apache.hadoop.hive.ql.exec.vector.VectorizationContext) VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) HiveConf(org.apache.hadoop.hive.conf.HiveConf) VectorAggregationBufferRow(org.apache.hadoop.hive.ql.exec.vector.VectorAggregationBufferRow) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector) VectorAggregateExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) VectorExtractRow(org.apache.hadoop.hive.ql.exec.vector.VectorExtractRow) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) VectorAggregationDesc(org.apache.hadoop.hive.ql.exec.vector.VectorAggregationDesc) VectorizedRowBatchCtx(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx) AggregationBuffer(org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.AggregationBuffer) VectorRandomRowSource(org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource)

Aggregations

VectorizedRowBatchCtx (org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx)34 VectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch)14 HiveConf (org.apache.hadoop.hive.conf.HiveConf)12 VectorExtractRow (org.apache.hadoop.hive.ql.exec.vector.VectorExtractRow)12 VectorRandomRowSource (org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource)12 VectorizationContext (org.apache.hadoop.hive.ql.exec.vector.VectorizationContext)12 VectorUDFAdaptor (org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFAdaptor)11 DataTypePhysicalVariation (org.apache.hadoop.hive.common.type.DataTypePhysicalVariation)10 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)10 ArrayList (java.util.ArrayList)9 VectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)8 DecimalTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo)8 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)8 GenericUDF (org.apache.hadoop.hive.ql.udf.generic.GenericUDF)7 MapWork (org.apache.hadoop.hive.ql.plan.MapWork)6 VectorRandomBatchSource (org.apache.hadoop.hive.ql.exec.vector.VectorRandomBatchSource)5 GenerationSpec (org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource.GenerationSpec)5 AcidOutputFormat (org.apache.hadoop.hive.ql.io.AcidOutputFormat)5 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)5 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)5