Examples with TypeInfoFactory.stringTypeInfo - org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.stringTypeInfo

Example 36 with TypeInfoFactory.stringTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.stringTypeInfo in project hive by apache.

the class TestGenericUDFAesEncrypt method testAesEnc128ConstStr.

@Test
public void testAesEnc128ConstStr() throws HiveException {
    GenericUDFAesEncrypt udf = new GenericUDFAesEncrypt();
    ObjectInspector valueOI0 = PrimitiveObjectInspectorFactory.writableStringObjectInspector;
    Text keyWr = new Text("1234567890123456");
    ObjectInspector valueOI1 = PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(TypeInfoFactory.stringTypeInfo, keyWr);
    ObjectInspector[] arguments = { valueOI0, valueOI1 };
    udf.initialize(arguments);
    runAndVerifyStr("ABC", keyWr, "y6Ss+zCYObpCbgfWfyNWTw==", udf);
    runAndVerifyStr("", keyWr, "BQGHoM3lqYcsurCRq3PlUw==", udf);
    // null
    runAndVerifyStr(null, keyWr, null, udf);
}

Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) Text(org.apache.hadoop.io.Text) Test(org.junit.Test)

Example 37 with TypeInfoFactory.stringTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.stringTypeInfo in project hive by apache.

the class TestVectorStringConcat method doVectorStringConcatTest.

private void doVectorStringConcatTest(TypeInfo stringTypeInfo1, TypeInfo stringTypeInfo2, List<String> columns, TypeInfo[] typeInfos, List<ExprNodeDesc> children, StringConcatTestMode stringConcatTestMode, ColumnScalarMode columnScalarMode, VectorRandomBatchSource batchSource, VectorizedRowBatchCtx batchContext, ObjectInspector rowInspector, GenericUDF genericUdf, Object[] resultObjects) throws Exception {
    HiveConf hiveConf = new HiveConf();
    if (stringConcatTestMode == StringConcatTestMode.ADAPTOR) {
        hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_TEST_VECTOR_ADAPTOR_OVERRIDE, true);
    }
    DataTypePhysicalVariation[] dataTypePhysicalVariations = new DataTypePhysicalVariation[2];
    Arrays.fill(dataTypePhysicalVariations, DataTypePhysicalVariation.NONE);
    ExprNodeGenericFuncDesc exprDesc = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, genericUdf, children);
    // ---------------------------------------
    // Just so we can get the output type...
    ExprNodeEvaluator evaluator = ExprNodeEvaluatorFactory.get(exprDesc, hiveConf);
    evaluator.initialize(rowInspector);
    ObjectInspector objectInspector = evaluator.getOutputOI();
    TypeInfo outputTypeInfo = TypeInfoUtils.getTypeInfoFromObjectInspector(objectInspector);
    /*
     * Again with correct output type...
     */
    exprDesc = new ExprNodeGenericFuncDesc(outputTypeInfo, genericUdf, children);
    // ---------------------------------------
    VectorizationContext vectorizationContext = new VectorizationContext("name", columns, Arrays.asList(typeInfos), Arrays.asList(dataTypePhysicalVariations), hiveConf);
    VectorExpression vectorExpression = vectorizationContext.getVectorExpression(exprDesc);
    vectorExpression.transientInit(hiveConf);
    if (stringConcatTestMode == StringConcatTestMode.VECTOR_EXPRESSION && vectorExpression instanceof VectorUDFAdaptor) {
        System.out.println("*NO NATIVE VECTOR EXPRESSION* stringTypeInfo1 " + stringTypeInfo1.toString() + " stringTypeInfo2 " + stringTypeInfo2.toString() + " stringConcatTestMode " + stringConcatTestMode + " columnScalarMode " + columnScalarMode + " vectorExpression " + vectorExpression.toString());
    }
    VectorizedRowBatch batch = batchContext.createVectorizedRowBatch();
    VectorExtractRow resultVectorExtractRow = new VectorExtractRow();
    resultVectorExtractRow.init(new TypeInfo[] { outputTypeInfo }, new int[] { columns.size() });
    Object[] scrqtchRow = new Object[1];
    // System.out.println("*VECTOR EXPRESSION* " + vectorExpression.getClass().getSimpleName());
    /*
    System.out.println(
        "*DEBUG* stringTypeInfo1 " + stringTypeInfo1.toString() +
        " stringTypeInfo2 " + stringTypeInfo2.toString() +
        " stringConcatTestMode " + stringConcatTestMode +
        " columnScalarMode " + columnScalarMode +
        " vectorExpression " + vectorExpression.toString());
    */
    batchSource.resetBatchIteration();
    int rowIndex = 0;
    while (true) {
        if (!batchSource.fillNextBatch(batch)) {
            break;
        }
        vectorExpression.evaluate(batch);
        extractResultObjects(batch, rowIndex, resultVectorExtractRow, scrqtchRow, objectInspector, resultObjects);
        rowIndex += batch.size;
    }
}

Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) ExprNodeEvaluator(org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) VectorizationContext(org.apache.hadoop.hive.ql.exec.vector.VectorizationContext) VectorUDFAdaptor(org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFAdaptor) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) VarcharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) CharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo) VectorExtractRow(org.apache.hadoop.hive.ql.exec.vector.VectorExtractRow) VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) DataTypePhysicalVariation(org.apache.hadoop.hive.common.type.DataTypePhysicalVariation) HiveConf(org.apache.hadoop.hive.conf.HiveConf) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)

Example 38 with TypeInfoFactory.stringTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.stringTypeInfo in project hive by apache.

the class TestVectorSubStr method doTests.

private void doTests(Random random, boolean useLength) throws Exception {
    String typeName = "string";
    TypeInfo typeInfo = TypeInfoFactory.stringTypeInfo;
    TypeInfo targetTypeInfo = typeInfo;
    String functionName = "substr";
    List<GenerationSpec> generationSpecList = new ArrayList<GenerationSpec>();
    List<DataTypePhysicalVariation> explicitDataTypePhysicalVariationList = new ArrayList<DataTypePhysicalVariation>();
    List<String> columns = new ArrayList<String>();
    int columnNum = 1;
    ExprNodeDesc col1Expr;
    StringGenerationOption stringGenerationOption = new StringGenerationOption(true, true);
    generationSpecList.add(GenerationSpec.createStringFamily(typeInfo, stringGenerationOption));
    explicitDataTypePhysicalVariationList.add(DataTypePhysicalVariation.NONE);
    String columnName = "col" + (columnNum++);
    col1Expr = new ExprNodeColumnDesc(typeInfo, columnName, "table", false);
    columns.add(columnName);
    VectorRandomRowSource rowSource = new VectorRandomRowSource();
    rowSource.initGenerationSpecSchema(random, generationSpecList, /* maxComplexDepth */
    0, /* allowNull */
    true, /* isUnicodeOk */
    true, explicitDataTypePhysicalVariationList);
    List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>();
    children.add(col1Expr);
    final int position = 10 - random.nextInt(21);
    Object scalar2Object = Integer.valueOf(position);
    ExprNodeDesc col2Expr = new ExprNodeConstantDesc(TypeInfoFactory.intTypeInfo, scalar2Object);
    children.add(col2Expr);
    if (useLength) {
        Object scalar3Object = random.nextInt(12);
        ExprNodeDesc col3Expr = new ExprNodeConstantDesc(TypeInfoFactory.intTypeInfo, scalar3Object);
        children.add(col3Expr);
    }
    // ----------------------------------------------------------------------------------------------
    String[] columnNames = columns.toArray(new String[0]);
    String[] outputScratchTypeNames = new String[] { targetTypeInfo.getTypeName() };
    DataTypePhysicalVariation[] outputDataTypePhysicalVariations = new DataTypePhysicalVariation[] { DataTypePhysicalVariation.NONE };
    VectorizedRowBatchCtx batchContext = new VectorizedRowBatchCtx(columnNames, rowSource.typeInfos(), rowSource.dataTypePhysicalVariations(), /* dataColumnNums */
    null, /* partitionColumnCount */
    0, /* virtualColumnCount */
    0, /* neededVirtualColumns */
    null, outputScratchTypeNames, outputDataTypePhysicalVariations);
    Object[][] randomRows = rowSource.randomRows(100000);
    VectorRandomBatchSource batchSource = VectorRandomBatchSource.createInterestingBatches(random, rowSource, randomRows, null);
    GenericUDF genericUdf;
    FunctionInfo funcInfo = null;
    try {
        funcInfo = FunctionRegistry.getFunctionInfo(functionName);
    } catch (SemanticException e) {
        Assert.fail("Failed to load " + functionName + " " + e);
    }
    genericUdf = funcInfo.getGenericUDF();
    final int rowCount = randomRows.length;
    Object[][] resultObjectsArray = new Object[SubStrTestMode.count][];
    for (int i = 0; i < SubStrTestMode.count; i++) {
        Object[] resultObjects = new Object[rowCount];
        resultObjectsArray[i] = resultObjects;
        SubStrTestMode subStrTestMode = SubStrTestMode.values()[i];
        switch(subStrTestMode) {
            case ROW_MODE:
                doRowIfTest(typeInfo, targetTypeInfo, columns, children, randomRows, rowSource.rowStructObjectInspector(), genericUdf, resultObjects);
                break;
            case ADAPTOR:
            case VECTOR_EXPRESSION:
                doVectorIfTest(typeInfo, targetTypeInfo, columns, rowSource.typeInfos(), rowSource.dataTypePhysicalVariations(), children, subStrTestMode, batchSource, batchContext, genericUdf, resultObjects);
                break;
            default:
                throw new RuntimeException("Unexpected STRING Unary test mode " + subStrTestMode);
        }
    }
    for (int i = 0; i < rowCount; i++) {
        // Row-mode is the expected value.
        Object expectedResult = resultObjectsArray[0][i];
        for (int v = 1; v < SubStrTestMode.count; v++) {
            Object vectorResult = resultObjectsArray[v][i];
            if (expectedResult == null || vectorResult == null) {
                if (expectedResult != null || vectorResult != null) {
                    Assert.fail("Row " + i + " " + SubStrTestMode.values()[v] + " result is NULL " + (vectorResult == null ? "YES" : "NO result " + vectorResult.toString()) + " does not match row-mode expected result is NULL " + (expectedResult == null ? "YES" : "NO result " + expectedResult.toString()) + " row values " + Arrays.toString(randomRows[i]));
                }
            } else {
                if (!expectedResult.equals(vectorResult)) {
                    Assert.fail("Row " + i + " " + SubStrTestMode.values()[v] + " result " + vectorResult.toString() + " (" + vectorResult.getClass().getSimpleName() + ")" + " does not match row-mode expected result " + expectedResult.toString() + " (" + expectedResult.getClass().getSimpleName() + ")" + " row values " + Arrays.toString(randomRows[i]));
                }
            }
        }
    }
}

Also used : ArrayList(java.util.ArrayList) DataTypePhysicalVariation(org.apache.hadoop.hive.common.type.DataTypePhysicalVariation) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) VectorRandomBatchSource(org.apache.hadoop.hive.ql.exec.vector.VectorRandomBatchSource) FunctionInfo(org.apache.hadoop.hive.ql.exec.FunctionInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) GenerationSpec(org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource.GenerationSpec) VectorizedRowBatchCtx(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx) GenericUDF(org.apache.hadoop.hive.ql.udf.generic.GenericUDF) StringGenerationOption(org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource.StringGenerationOption) VectorRandomRowSource(org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource)

Example 39 with TypeInfoFactory.stringTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.stringTypeInfo in project hive by apache.

the class TestVectorGenericDateExpressions method testToDate.

@Test
public void testToDate() throws HiveException {
    for (PrimitiveCategory type : Arrays.asList(PrimitiveCategory.TIMESTAMP, PrimitiveCategory.STRING)) {
        LongColumnVector date = newRandomLongColumnVector(10000, size);
        LongColumnVector output = new LongColumnVector(size);
        VectorizedRowBatch batch = new VectorizedRowBatch(2, size);
        batch.cols[0] = castTo(date, type);
        batch.cols[1] = output;
        validateToDate(batch, type, date);
        TestVectorizedRowBatch.addRandomNulls(date);
        batch.cols[0] = castTo(date, type);
        validateToDate(batch, type, date);
    }
    VectorExpression udf = new CastStringToDate(0, 1);
    udf.setInputTypeInfos(new TypeInfo[] { TypeInfoFactory.stringTypeInfo });
    udf.transientInit(hiveConf);
    VectorizedRowBatch batch = new VectorizedRowBatch(2, 1);
    batch.cols[0] = new BytesColumnVector(1);
    batch.cols[1] = new LongColumnVector(1);
    BytesColumnVector bcv = (BytesColumnVector) batch.cols[0];
    byte[] bytes = "error".getBytes(utf8);
    bcv.vector[0] = bytes;
    bcv.start[0] = 0;
    bcv.length[0] = bytes.length;
    udf.evaluate(batch);
    Assert.assertEquals(batch.cols[1].isNull[0], true);
}

Also used : VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) TestVectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.TestVectorizedRowBatch) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector) Test(org.junit.Test)

Example 40 with TypeInfoFactory.stringTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.stringTypeInfo in project hive by apache.

the class TestVectorGenericDateExpressions method testDateAddColScalar.

@Test
public void testDateAddColScalar() throws HiveException {
    for (PrimitiveCategory colType1 : dateTimestampStringTypes) testDateAddColScalar(colType1, true);
    VectorExpression udf = new VectorUDFDateAddColScalar(0, 0, 1);
    udf.setInputTypeInfos(new TypeInfo[] { TypeInfoFactory.stringTypeInfo, TypeInfoFactory.timestampTypeInfo });
    udf.transientInit(hiveConf);
    VectorizedRowBatch batch = new VectorizedRowBatch(2, 1);
    batch.cols[0] = new BytesColumnVector(1);
    batch.cols[1] = new LongColumnVector(1);
    BytesColumnVector bcv = (BytesColumnVector) batch.cols[0];
    byte[] bytes = "error".getBytes(utf8);
    bcv.vector[0] = bytes;
    bcv.start[0] = 0;
    bcv.length[0] = bytes.length;
    udf.evaluate(batch);
    Assert.assertEquals(batch.cols[1].isNull[0], true);
}

Aggregations

Test (org.junit.Test)65 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)44 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)36 Text (org.apache.hadoop.io.Text)34 ArrayList (java.util.ArrayList)19 HiveConf (org.apache.hadoop.hive.conf.HiveConf)19 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)17 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)16 ExprNodeGenericFuncDesc (org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)16 VectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch)14 ExprNodeConstantDesc (org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc)14 SmallTableGenerationParameters (org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestDescription.SmallTableGenerationParameters)13 DecimalTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo)13 PrimitiveCategory (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory)12 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)12 Properties (java.util.Properties)11 BytesColumnVector (org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector)11 LongColumnVector (org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)11 TestVectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.TestVectorizedRowBatch)11 Configuration (org.apache.hadoop.conf.Configuration)10