Search in sources :

Example 26 with TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo in project hive by apache.

the class TestVectorIfStatement method doVectorIfTest.

private void doVectorIfTest(TypeInfo typeInfo, IfVariation ifVariation, List<String> columns, String[] columnNames, TypeInfo[] typeInfos, DataTypePhysicalVariation[] dataTypePhysicalVariations, List<ExprNodeDesc> children, IfStmtTestMode ifStmtTestMode, ColumnScalarMode columnScalarMode, VectorRandomBatchSource batchSource, Object[] resultObjects) throws Exception {
    final boolean isFilter = ifVariation.isFilter;
    GenericUDF udf;
    switch(ifStmtTestMode) {
        case VECTOR_EXPRESSION:
            udf = new GenericUDFIf();
            break;
        case ADAPTOR_WHEN:
            udf = new GenericUDFWhen();
            break;
        default:
            throw new RuntimeException("Unexpected IF statement test mode " + ifStmtTestMode);
    }
    ExprNodeGenericFuncDesc exprDesc = new ExprNodeGenericFuncDesc(typeInfo, udf, children);
    String ifExprMode = (ifStmtTestMode != IfStmtTestMode.VECTOR_EXPRESSION ? "adaptor" : "good");
    HiveConf hiveConf = new HiveConf();
    hiveConf.setVar(HiveConf.ConfVars.HIVE_VECTORIZED_IF_EXPR_MODE, ifExprMode);
    VectorizationContext vectorizationContext = new VectorizationContext("name", columns, Arrays.asList(typeInfos), Arrays.asList(dataTypePhysicalVariations), hiveConf);
    VectorExpression vectorExpression = vectorizationContext.getVectorExpression(exprDesc, (isFilter ? VectorExpressionDescriptor.Mode.FILTER : VectorExpressionDescriptor.Mode.PROJECTION));
    final TypeInfo outputTypeInfo;
    final ObjectInspector objectInspector;
    if (!isFilter) {
        outputTypeInfo = vectorExpression.getOutputTypeInfo();
        objectInspector = TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(outputTypeInfo);
    } else {
        outputTypeInfo = null;
        objectInspector = null;
    }
    if (ifStmtTestMode == IfStmtTestMode.VECTOR_EXPRESSION && vectorExpression instanceof VectorUDFAdaptor) {
        System.out.println("*NO NATIVE VECTOR EXPRESSION* typeInfo " + typeInfo.toString() + " ifStmtTestMode " + ifStmtTestMode + " ifVariation " + ifVariation + " columnScalarMode " + columnScalarMode + " vectorExpression " + vectorExpression.toString());
    }
    String[] outputScratchTypeNames = vectorizationContext.getScratchColumnTypeNames();
    DataTypePhysicalVariation[] outputDataTypePhysicalVariations = vectorizationContext.getScratchDataTypePhysicalVariations();
    VectorizedRowBatchCtx batchContext = new VectorizedRowBatchCtx(columnNames, typeInfos, dataTypePhysicalVariations, /* dataColumnNums */
    null, /* partitionColumnCount */
    0, /* virtualColumnCount */
    0, /* neededVirtualColumns */
    null, outputScratchTypeNames, outputDataTypePhysicalVariations);
    VectorizedRowBatch batch = batchContext.createVectorizedRowBatch();
    // System.out.println("*VECTOR EXPRESSION* " + vectorExpression.getClass().getSimpleName());
    /*
    System.out.println(
        "*DEBUG* typeInfo " + typeInfo.toString() +
        " ifStmtTestMode " + ifStmtTestMode +
        " ifVariation " + ifVariation +
        " columnScalarMode " + columnScalarMode +
        " vectorExpression " + vectorExpression.toString());
    */
    VectorExtractRow resultVectorExtractRow = null;
    Object[] scrqtchRow = null;
    if (!isFilter) {
        resultVectorExtractRow = new VectorExtractRow();
        final int outputColumnNum = vectorExpression.getOutputColumnNum();
        resultVectorExtractRow.init(new TypeInfo[] { outputTypeInfo }, new int[] { outputColumnNum });
        scrqtchRow = new Object[1];
    }
    boolean copySelectedInUse = false;
    int[] copySelected = new int[VectorizedRowBatch.DEFAULT_SIZE];
    batchSource.resetBatchIteration();
    int rowIndex = 0;
    while (true) {
        if (!batchSource.fillNextBatch(batch)) {
            break;
        }
        final int originalBatchSize = batch.size;
        if (isFilter) {
            copySelectedInUse = batch.selectedInUse;
            if (batch.selectedInUse) {
                System.arraycopy(batch.selected, 0, copySelected, 0, originalBatchSize);
            }
        }
        // In filter mode, the batch size can be made smaller.
        vectorExpression.evaluate(batch);
        if (!isFilter) {
            extractResultObjects(batch, rowIndex, resultVectorExtractRow, scrqtchRow, objectInspector, resultObjects);
        } else {
            final int currentBatchSize = batch.size;
            if (copySelectedInUse && batch.selectedInUse) {
                int selectIndex = 0;
                for (int i = 0; i < originalBatchSize; i++) {
                    final int originalBatchIndex = copySelected[i];
                    final boolean booleanResult;
                    if (selectIndex < currentBatchSize && batch.selected[selectIndex] == originalBatchIndex) {
                        booleanResult = true;
                        selectIndex++;
                    } else {
                        booleanResult = false;
                    }
                    resultObjects[rowIndex + i] = new BooleanWritable(booleanResult);
                }
            } else if (batch.selectedInUse) {
                int selectIndex = 0;
                for (int i = 0; i < originalBatchSize; i++) {
                    final boolean booleanResult;
                    if (selectIndex < currentBatchSize && batch.selected[selectIndex] == i) {
                        booleanResult = true;
                        selectIndex++;
                    } else {
                        booleanResult = false;
                    }
                    resultObjects[rowIndex + i] = new BooleanWritable(booleanResult);
                }
            } else if (currentBatchSize == 0) {
                // Whole batch got zapped.
                for (int i = 0; i < originalBatchSize; i++) {
                    resultObjects[rowIndex + i] = new BooleanWritable(false);
                }
            } else {
                // Every row kept.
                for (int i = 0; i < originalBatchSize; i++) {
                    resultObjects[rowIndex + i] = new BooleanWritable(true);
                }
            }
        }
        rowIndex += originalBatchSize;
    }
}
Also used : GenericUDFIf(org.apache.hadoop.hive.ql.udf.generic.GenericUDFIf) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) GenericUDFWhen(org.apache.hadoop.hive.ql.udf.generic.GenericUDFWhen) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) VectorizationContext(org.apache.hadoop.hive.ql.exec.vector.VectorizationContext) VectorUDFAdaptor(org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFAdaptor) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) VectorExtractRow(org.apache.hadoop.hive.ql.exec.vector.VectorExtractRow) VectorizedRowBatchCtx(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx) VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) DataTypePhysicalVariation(org.apache.hadoop.hive.common.type.DataTypePhysicalVariation) GenericUDF(org.apache.hadoop.hive.ql.udf.generic.GenericUDF) BooleanWritable(org.apache.hadoop.io.BooleanWritable) HiveConf(org.apache.hadoop.hive.conf.HiveConf) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)

Example 27 with TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo in project hive by apache.

the class TestVectorNegative method doRowArithmeticTest.

private void doRowArithmeticTest(TypeInfo typeInfo, List<String> columns, List<ExprNodeDesc> children, ExprNodeGenericFuncDesc exprDesc, Object[][] randomRows, ObjectInspector rowInspector, TypeInfo outputTypeInfo, Object[] resultObjects) throws Exception {
    /*
    System.out.println(
        "*DEBUG* typeInfo " + typeInfo.toString() +
        " negativeTestMode ROW_MODE" +
        " exprDesc " + exprDesc.toString());
    */
    HiveConf hiveConf = new HiveConf();
    ExprNodeEvaluator evaluator = ExprNodeEvaluatorFactory.get(exprDesc, hiveConf);
    evaluator.initialize(rowInspector);
    ObjectInspector objectInspector = TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(outputTypeInfo);
    final int rowCount = randomRows.length;
    for (int i = 0; i < rowCount; i++) {
        Object[] row = randomRows[i];
        Object result = evaluator.evaluate(row);
        Object copyResult = null;
        try {
            copyResult = ObjectInspectorUtils.copyToStandardObject(result, objectInspector, ObjectInspectorCopyOption.WRITABLE);
        } catch (Exception e) {
            System.out.println("here");
        }
        resultObjects[i] = copyResult;
    }
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) ExprNodeEvaluator(org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator) HiveConf(org.apache.hadoop.hive.conf.HiveConf) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException)

Example 28 with TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo in project hive by apache.

the class Vectorizer method getVectorAggregationDesc.

private static ImmutablePair<VectorAggregationDesc, String> getVectorAggregationDesc(AggregationDesc aggrDesc, VectorizationContext vContext) throws HiveException {
    String aggregateName = aggrDesc.getGenericUDAFName();
    List<ExprNodeDesc> parameterList = aggrDesc.getParameters();
    final int parameterCount = parameterList.size();
    final GenericUDAFEvaluator.Mode udafEvaluatorMode = aggrDesc.getMode();
    /*
     * Look at evaluator to get output type info.
     */
    GenericUDAFEvaluator evaluator = aggrDesc.getGenericUDAFEvaluator();
    ObjectInspector[] parameterObjectInspectors = new ObjectInspector[parameterCount];
    for (int i = 0; i < parameterCount; i++) {
        TypeInfo typeInfo = parameterList.get(i).getTypeInfo();
        parameterObjectInspectors[i] = TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(typeInfo);
    }
    // The only way to get the return object inspector (and its return type) is to
    // initialize it...
    ObjectInspector returnOI = evaluator.init(aggrDesc.getMode(), parameterObjectInspectors);
    final TypeInfo outputTypeInfo = TypeInfoUtils.getTypeInfoFromTypeString(returnOI.getTypeName());
    return getVectorAggregationDesc(aggregateName, parameterList, evaluator, outputTypeInfo, udafEvaluatorMode, vContext);
}
Also used : StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)

Example 29 with TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo in project hive by apache.

the class FakeCaptureVectorToRowOutputOperator method initializeOp.

@Override
public void initializeOp(Configuration conf) throws HiveException {
    super.initializeOp(conf);
    VectorizationContextRegion vectorizationContextRegion = (VectorizationContextRegion) op;
    VectorizationContext outputVectorizationContext = vectorizationContextRegion.getOutputVectorizationContext();
    outputTypeInfos = outputVectorizationContext.getInitialTypeInfos();
    final int outputLength = outputTypeInfos.length;
    outputObjectInspectors = new ObjectInspector[outputLength];
    for (int i = 0; i < outputLength; i++) {
        TypeInfo typeInfo = outputTypeInfos[i];
        outputObjectInspectors[i] = TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(typeInfo);
    }
    vectorExtractRow = new VectorExtractRow();
    vectorExtractRow.init(outputTypeInfos);
}
Also used : VectorizationContext(org.apache.hadoop.hive.ql.exec.vector.VectorizationContext) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) VectorExtractRow(org.apache.hadoop.hive.ql.exec.vector.VectorExtractRow) VectorizationContextRegion(org.apache.hadoop.hive.ql.exec.vector.VectorizationContextRegion)

Example 30 with TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo in project hive by apache.

the class HBaseUtils method deserializePartitionKey.

// Deserialize a partition key and return _only_ the partition values.
private static List<String> deserializePartitionKey(List<FieldSchema> partitions, byte[] key, Configuration conf) {
    StringBuffer names = new StringBuffer();
    names.append("dbName,tableName,");
    StringBuffer types = new StringBuffer();
    types.append("string,string,");
    for (int i = 0; i < partitions.size(); i++) {
        names.append(partitions.get(i).getName());
        types.append(TypeInfoUtils.getTypeInfoFromTypeString(partitions.get(i).getType()));
        if (i != partitions.size() - 1) {
            names.append(",");
            types.append(",");
        }
    }
    BinarySortableSerDe serDe = new BinarySortableSerDe();
    Properties props = new Properties();
    props.setProperty(serdeConstants.LIST_COLUMNS, names.toString());
    props.setProperty(serdeConstants.LIST_COLUMN_TYPES, types.toString());
    try {
        serDe.initialize(conf, props);
        List deserializedkeys = ((List) serDe.deserialize(new BytesWritable(key))).subList(2, partitions.size() + 2);
        List<String> partitionKeys = new ArrayList<String>();
        for (int i = 0; i < deserializedkeys.size(); i++) {
            Object deserializedKey = deserializedkeys.get(i);
            if (deserializedKey == null) {
                partitionKeys.add(HiveConf.getVar(conf, HiveConf.ConfVars.DEFAULTPARTITIONNAME));
            } else {
                TypeInfo inputType = TypeInfoUtils.getTypeInfoFromTypeString(partitions.get(i).getType());
                ObjectInspector inputOI = TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(inputType);
                Converter converter = ObjectInspectorConverters.getConverter(inputOI, PrimitiveObjectInspectorFactory.javaStringObjectInspector);
                partitionKeys.add((String) converter.convert(deserializedKey));
            }
        }
        return partitionKeys;
    } catch (SerDeException e) {
        throw new RuntimeException("Error when deserialize key", e);
    }
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) BinarySortableSerDe(org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe) ArrayList(java.util.ArrayList) BytesWritable(org.apache.hadoop.io.BytesWritable) ByteString(com.google.protobuf.ByteString) Properties(java.util.Properties) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) Converter(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter) List(java.util.List) ArrayList(java.util.ArrayList) SerDeException(org.apache.hadoop.hive.serde2.SerDeException)

Aggregations

ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)44 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)31 ArrayList (java.util.ArrayList)22 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)17 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)13 HiveConf (org.apache.hadoop.hive.conf.HiveConf)12 DecimalTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo)11 ExprNodeEvaluator (org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator)10 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)10 ExprNodeGenericFuncDesc (org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)9 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)9 DataTypePhysicalVariation (org.apache.hadoop.hive.common.type.DataTypePhysicalVariation)8 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)8 GenericUDF (org.apache.hadoop.hive.ql.udf.generic.GenericUDF)8 VectorRandomBatchSource (org.apache.hadoop.hive.ql.exec.vector.VectorRandomBatchSource)7 VectorRandomRowSource (org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource)7 GenerationSpec (org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource.GenerationSpec)7 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)7 PrimitiveObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)6 CharTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo)6