Search in sources :

Example 1 with GenericUDFElt

use of org.apache.hadoop.hive.ql.udf.generic.GenericUDFElt in project hive by apache.

the class TestVectorCoalesceElt method doCoalesceOnRandomDataType.

private boolean doCoalesceOnRandomDataType(Random random, int iteration, boolean isCoalesce, boolean isEltIndexConst, int columnCount, int[] constantColumns, int[] nullConstantColumns, boolean allowNulls) throws Exception {
    String typeName;
    if (isCoalesce) {
        typeName = VectorRandomRowSource.getRandomTypeName(random, SupportedTypes.PRIMITIVES, /* allowedTypeNameSet */
        null);
        typeName = VectorRandomRowSource.getDecoratedTypeName(random, typeName, SupportedTypes.PRIMITIVES, /* allowedTypeNameSet */
        null, /* depth */
        0, /* maxDepth */
        2);
    } else {
        // ELT only choose between STRINGs.
        typeName = "string";
    }
    TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeName);
    // ----------------------------------------------------------------------------------------------
    final TypeInfo intTypeInfo;
    ObjectInspector intObjectInspector;
    if (isCoalesce) {
        intTypeInfo = null;
        intObjectInspector = null;
    } else {
        intTypeInfo = TypeInfoFactory.intTypeInfo;
        intObjectInspector = TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(intTypeInfo);
    }
    ObjectInspector objectInspector = TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(typeInfo);
    // ----------------------------------------------------------------------------------------------
    List<GenerationSpec> generationSpecList = new ArrayList<GenerationSpec>();
    List<DataTypePhysicalVariation> explicitDataTypePhysicalVariationList = new ArrayList<DataTypePhysicalVariation>();
    List<String> columns = new ArrayList<String>();
    List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>();
    int columnNum = 1;
    if (!isCoalesce) {
        List<Object> intValueList = new ArrayList<Object>();
        for (int i = -1; i < columnCount + 2; i++) {
            intValueList.add(new IntWritable(i));
        }
        final int intValueListCount = intValueList.size();
        ExprNodeDesc intColExpr;
        if (!isEltIndexConst) {
            generationSpecList.add(GenerationSpec.createValueList(intTypeInfo, intValueList));
            explicitDataTypePhysicalVariationList.add(DataTypePhysicalVariation.NONE);
            String columnName = "col" + columnNum++;
            columns.add(columnName);
            intColExpr = new ExprNodeColumnDesc(intTypeInfo, columnName, "table", false);
        } else {
            final Object scalarObject;
            if (random.nextInt(10) != 0) {
                scalarObject = intValueList.get(random.nextInt(intValueListCount));
            } else {
                scalarObject = null;
            }
            intColExpr = new ExprNodeConstantDesc(typeInfo, scalarObject);
        }
        children.add(intColExpr);
    }
    for (int c = 0; c < columnCount; c++) {
        ExprNodeDesc colExpr;
        if (!contains(constantColumns, c)) {
            generationSpecList.add(GenerationSpec.createSameType(typeInfo));
            explicitDataTypePhysicalVariationList.add(DataTypePhysicalVariation.NONE);
            String columnName = "col" + columnNum++;
            columns.add(columnName);
            colExpr = new ExprNodeColumnDesc(typeInfo, columnName, "table", false);
        } else {
            final Object scalarObject;
            if (!contains(nullConstantColumns, c)) {
                scalarObject = VectorRandomRowSource.randomPrimitiveObject(random, (PrimitiveTypeInfo) typeInfo);
            } else {
                scalarObject = null;
            }
            colExpr = new ExprNodeConstantDesc(typeInfo, scalarObject);
        }
        children.add(colExpr);
    }
    VectorRandomRowSource rowSource = new VectorRandomRowSource();
    rowSource.initGenerationSpecSchema(random, generationSpecList, /* maxComplexDepth */
    0, /* allowNull */
    allowNulls, /* isUnicodeOk */
    true, explicitDataTypePhysicalVariationList);
    String[] columnNames = columns.toArray(new String[0]);
    Object[][] randomRows = rowSource.randomRows(100000);
    VectorRandomBatchSource batchSource = VectorRandomBatchSource.createInterestingBatches(random, rowSource, randomRows, null);
    final GenericUDF udf = (isCoalesce ? new GenericUDFCoalesce() : new GenericUDFElt());
    final int start = isCoalesce ? 0 : 1;
    final int end = start + columnCount;
    ObjectInspector[] argumentOIs = new ObjectInspector[end];
    if (!isCoalesce) {
        argumentOIs[0] = intObjectInspector;
    }
    for (int i = start; i < end; i++) {
        argumentOIs[i] = objectInspector;
    }
    final ObjectInspector outputObjectInspector = udf.initialize(argumentOIs);
    TypeInfo outputTypeInfo = TypeInfoUtils.getTypeInfoFromObjectInspector(outputObjectInspector);
    ExprNodeGenericFuncDesc exprDesc = new ExprNodeGenericFuncDesc(typeInfo, udf, children);
    final int rowCount = randomRows.length;
    Object[][] resultObjectsArray = new Object[CoalesceEltTestMode.count][];
    for (int i = 0; i < CoalesceEltTestMode.count; i++) {
        Object[] resultObjects = new Object[rowCount];
        resultObjectsArray[i] = resultObjects;
        CoalesceEltTestMode coalesceEltTestMode = CoalesceEltTestMode.values()[i];
        switch(coalesceEltTestMode) {
            case ROW_MODE:
                if (!doRowCastTest(typeInfo, columns, children, udf, exprDesc, randomRows, rowSource.rowStructObjectInspector(), exprDesc.getWritableObjectInspector(), resultObjects)) {
                    return false;
                }
                break;
            case ADAPTOR:
            case VECTOR_EXPRESSION:
                if (!doVectorCastTest(typeInfo, iteration, columns, columnNames, rowSource.typeInfos(), rowSource.dataTypePhysicalVariations(), children, udf, exprDesc, coalesceEltTestMode, batchSource, exprDesc.getWritableObjectInspector(), outputTypeInfo, resultObjects)) {
                    return false;
                }
                break;
            default:
                throw new RuntimeException("Unexpected IF statement test mode " + coalesceEltTestMode);
        }
    }
    for (int i = 0; i < rowCount; i++) {
        // Row-mode is the expected value.
        Object expectedResult = resultObjectsArray[0][i];
        for (int v = 1; v < CoalesceEltTestMode.count; v++) {
            Object vectorResult = resultObjectsArray[v][i];
            CoalesceEltTestMode coalesceEltTestMode = CoalesceEltTestMode.values()[v];
            if (expectedResult == null || vectorResult == null) {
                if (expectedResult != null || vectorResult != null) {
                    Assert.fail("Row " + i + " sourceTypeName " + typeName + " " + coalesceEltTestMode + " iteration " + iteration + " result is NULL " + (vectorResult == null ? "YES" : "NO result " + vectorResult.toString()) + " does not match row-mode expected result is NULL " + (expectedResult == null ? "YES" : "NO result '" + expectedResult.toString()) + "'" + " row values " + Arrays.toString(randomRows[i]) + " exprDesc " + exprDesc.toString());
                }
            } else {
                if (!expectedResult.equals(vectorResult)) {
                    Assert.fail("Row " + i + " sourceTypeName " + typeName + " " + coalesceEltTestMode + " iteration " + iteration + " result '" + vectorResult.toString() + "'" + " (" + vectorResult.getClass().getSimpleName() + ")" + " does not match row-mode expected result '" + expectedResult.toString() + "'" + " (" + expectedResult.getClass().getSimpleName() + ")" + " row values " + Arrays.toString(randomRows[i]) + " exprDesc " + exprDesc.toString());
                }
            }
        }
    }
    return true;
}
Also used : ArrayList(java.util.ArrayList) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) DataTypePhysicalVariation(org.apache.hadoop.hive.common.type.DataTypePhysicalVariation) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) IntWritable(org.apache.hadoop.io.IntWritable) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) VectorRandomBatchSource(org.apache.hadoop.hive.ql.exec.vector.VectorRandomBatchSource) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) GenerationSpec(org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource.GenerationSpec) GenericUDF(org.apache.hadoop.hive.ql.udf.generic.GenericUDF) GenericUDFCoalesce(org.apache.hadoop.hive.ql.udf.generic.GenericUDFCoalesce) GenericUDFElt(org.apache.hadoop.hive.ql.udf.generic.GenericUDFElt) VectorRandomRowSource(org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource)

Aggregations

ArrayList (java.util.ArrayList)1 DataTypePhysicalVariation (org.apache.hadoop.hive.common.type.DataTypePhysicalVariation)1 VectorRandomBatchSource (org.apache.hadoop.hive.ql.exec.vector.VectorRandomBatchSource)1 VectorRandomRowSource (org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource)1 GenerationSpec (org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource.GenerationSpec)1 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)1 ExprNodeConstantDesc (org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc)1 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)1 ExprNodeGenericFuncDesc (org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)1 GenericUDF (org.apache.hadoop.hive.ql.udf.generic.GenericUDF)1 GenericUDFCoalesce (org.apache.hadoop.hive.ql.udf.generic.GenericUDFCoalesce)1 GenericUDFElt (org.apache.hadoop.hive.ql.udf.generic.GenericUDFElt)1 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)1 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)1 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)1 IntWritable (org.apache.hadoop.io.IntWritable)1