Search in sources :

Example 6 with PrimitiveTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo in project hive by apache.

the class AbstractTestGenericUDFOPNumeric method verifyReturnType.

protected void verifyReturnType(GenericUDF udf, String typeStr1, String typeStr2, String expectedTypeStr) throws HiveException {
    // Lookup type infos for our input types and expected return type
    PrimitiveTypeInfo type1 = TypeInfoFactory.getPrimitiveTypeInfo(typeStr1);
    PrimitiveTypeInfo type2 = TypeInfoFactory.getPrimitiveTypeInfo(typeStr2);
    PrimitiveTypeInfo expectedType = TypeInfoFactory.getPrimitiveTypeInfo(expectedTypeStr);
    // Initialize UDF which will output the return type for the UDF.
    ObjectInspector[] inputOIs = { PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(type1), PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(type2) };
    PrimitiveObjectInspector oi = (PrimitiveObjectInspector) udf.initialize(inputOIs);
    Assert.assertEquals("Return type for " + udf.getDisplayString(new String[] { typeStr1, typeStr2 }), expectedType, oi.getTypeInfo());
}
Also used : PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)

Example 7 with PrimitiveTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo in project hive by apache.

the class SerdeRandomRowSource method chooseSchema.

private void chooseSchema() {
    HashSet hashSet = null;
    boolean allTypes;
    boolean onlyOne = (r.nextInt(100) == 7);
    if (onlyOne) {
        columnCount = 1;
        allTypes = false;
    } else {
        allTypes = r.nextBoolean();
        if (allTypes) {
            // One of each type.
            columnCount = possibleHiveTypeNames.length;
            hashSet = new HashSet<Integer>();
        } else {
            columnCount = 1 + r.nextInt(20);
        }
    }
    typeNames = new ArrayList<String>(columnCount);
    primitiveCategories = new PrimitiveCategory[columnCount];
    primitiveTypeInfos = new PrimitiveTypeInfo[columnCount];
    primitiveObjectInspectorList = new ArrayList<ObjectInspector>(columnCount);
    List<String> columnNames = new ArrayList<String>(columnCount);
    for (int c = 0; c < columnCount; c++) {
        columnNames.add(String.format("col%d", c));
        String typeName;
        if (onlyOne) {
            typeName = possibleHiveTypeNames[r.nextInt(possibleHiveTypeNames.length)];
        } else {
            int typeNum;
            if (allTypes) {
                while (true) {
                    typeNum = r.nextInt(possibleHiveTypeNames.length);
                    Integer typeNumInteger = new Integer(typeNum);
                    if (!hashSet.contains(typeNumInteger)) {
                        hashSet.add(typeNumInteger);
                        break;
                    }
                }
            } else {
                typeNum = r.nextInt(possibleHiveTypeNames.length);
            }
            typeName = possibleHiveTypeNames[typeNum];
        }
        if (typeName.equals("char")) {
            int maxLength = 1 + r.nextInt(100);
            typeName = String.format("char(%d)", maxLength);
        } else if (typeName.equals("varchar")) {
            int maxLength = 1 + r.nextInt(100);
            typeName = String.format("varchar(%d)", maxLength);
        } else if (typeName.equals("decimal")) {
            typeName = String.format("decimal(%d,%d)", HiveDecimal.SYSTEM_DEFAULT_PRECISION, HiveDecimal.SYSTEM_DEFAULT_SCALE);
        }
        PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) TypeInfoUtils.getTypeInfoFromTypeString(typeName);
        primitiveTypeInfos[c] = primitiveTypeInfo;
        PrimitiveCategory primitiveCategory = primitiveTypeInfo.getPrimitiveCategory();
        primitiveCategories[c] = primitiveCategory;
        primitiveObjectInspectorList.add(PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(primitiveTypeInfo));
        typeNames.add(typeName);
    }
    rowStructObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, primitiveObjectInspectorList);
}
Also used : WritableIntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableIntObjectInspector) WritableByteObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableByteObjectInspector) WritableHiveCharObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveCharObjectInspector) WritableHiveVarcharObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveVarcharObjectInspector) WritableBooleanObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBooleanObjectInspector) WritableTimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableTimestampObjectInspector) WritableHiveIntervalDayTimeObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveIntervalDayTimeObjectInspector) WritableShortObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableShortObjectInspector) WritableDoubleObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDoubleObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) WritableHiveIntervalYearMonthObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveIntervalYearMonthObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) WritableFloatObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableFloatObjectInspector) WritableLongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableLongObjectInspector) WritableDateObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDateObjectInspector) WritableHiveDecimalObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveDecimalObjectInspector) WritableStringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector) ArrayList(java.util.ArrayList) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) HashSet(java.util.HashSet)

Example 8 with PrimitiveTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo in project hive by apache.

the class TestObjectInspectorConverters method testGetConvertedOI.

public void testGetConvertedOI() throws Throwable {
    // Try with types that have type params
    PrimitiveTypeInfo varchar5TI = (PrimitiveTypeInfo) TypeInfoFactory.getPrimitiveTypeInfo("varchar(5)");
    PrimitiveTypeInfo varchar10TI = (PrimitiveTypeInfo) TypeInfoFactory.getPrimitiveTypeInfo("varchar(10)");
    PrimitiveObjectInspector varchar5OI = PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(varchar5TI);
    PrimitiveObjectInspector varchar10OI = PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(varchar10TI);
    // output OI should have varchar type params
    PrimitiveObjectInspector poi = (PrimitiveObjectInspector) ObjectInspectorConverters.getConvertedOI(varchar10OI, varchar5OI);
    VarcharTypeInfo vcParams = (VarcharTypeInfo) poi.getTypeInfo();
    assertEquals("varchar length doesn't match", 5, vcParams.getLength());
}
Also used : VarcharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)

Example 9 with PrimitiveTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo in project hive by apache.

the class TestLazyBinaryFast method testLazyBinaryFastCase.

public void testLazyBinaryFastCase(int caseNum, boolean doNonRandomFill, Random r) throws Throwable {
    SerdeRandomRowSource source = new SerdeRandomRowSource();
    source.init(r);
    int rowCount = 1000;
    Object[][] rows = source.randomRows(rowCount);
    if (doNonRandomFill) {
        MyTestClass.nonRandomRowFill(rows, source.primitiveCategories());
    }
    StructObjectInspector rowStructObjectInspector = source.rowStructObjectInspector();
    PrimitiveTypeInfo[] primitiveTypeInfos = source.primitiveTypeInfos();
    int columnCount = primitiveTypeInfos.length;
    int writeColumnCount = columnCount;
    StructObjectInspector writeRowStructObjectInspector = rowStructObjectInspector;
    boolean doWriteFewerColumns = r.nextBoolean();
    if (doWriteFewerColumns) {
        writeColumnCount = 1 + r.nextInt(columnCount);
        if (writeColumnCount == columnCount) {
            doWriteFewerColumns = false;
        } else {
            writeRowStructObjectInspector = source.partialRowStructObjectInspector(writeColumnCount);
        }
    }
    String fieldNames = ObjectInspectorUtils.getFieldNames(rowStructObjectInspector);
    String fieldTypes = ObjectInspectorUtils.getFieldTypes(rowStructObjectInspector);
    AbstractSerDe serde = TestLazyBinarySerDe.getSerDe(fieldNames, fieldTypes);
    AbstractSerDe serde_fewer = null;
    if (doWriteFewerColumns) {
        String partialFieldNames = ObjectInspectorUtils.getFieldNames(writeRowStructObjectInspector);
        String partialFieldTypes = ObjectInspectorUtils.getFieldTypes(writeRowStructObjectInspector);
        serde_fewer = TestLazyBinarySerDe.getSerDe(partialFieldNames, partialFieldTypes);
        ;
    }
    testLazyBinaryFast(source, rows, serde, rowStructObjectInspector, serde_fewer, writeRowStructObjectInspector, primitiveTypeInfos, /* useIncludeColumns */
    false, /* doWriteFewerColumns */
    false, r);
    testLazyBinaryFast(source, rows, serde, rowStructObjectInspector, serde_fewer, writeRowStructObjectInspector, primitiveTypeInfos, /* useIncludeColumns */
    true, /* doWriteFewerColumns */
    false, r);
/*
     * Can the LazyBinary format really tolerate writing fewer columns?
     */
// if (doWriteFewerColumns) {
//   testLazyBinaryFast(
//       source, rows,
//       serde, rowStructObjectInspector,
//       serde_fewer, writeRowStructObjectInspector,
//       primitiveTypeInfos,
//       /* useIncludeColumns */ false, /* doWriteFewerColumns */ true, r);
//   testLazyBinaryFast(
//       source, rows,
//       serde, rowStructObjectInspector,
//       serde_fewer, writeRowStructObjectInspector,
//       primitiveTypeInfos,
//       /* useIncludeColumns */ true, /* doWriteFewerColumns */ true, r);
// }
}
Also used : SerdeRandomRowSource(org.apache.hadoop.hive.serde2.SerdeRandomRowSource) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) AbstractSerDe(org.apache.hadoop.hive.serde2.AbstractSerDe) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 10 with PrimitiveTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo in project hive by apache.

the class TestLazyBinaryFast method testLazyBinaryFast.

private void testLazyBinaryFast(SerdeRandomRowSource source, Object[][] rows, AbstractSerDe serde, StructObjectInspector rowOI, AbstractSerDe serde_fewer, StructObjectInspector writeRowOI, PrimitiveTypeInfo[] primitiveTypeInfos, boolean useIncludeColumns, boolean doWriteFewerColumns, Random r) throws Throwable {
    int rowCount = rows.length;
    int columnCount = primitiveTypeInfos.length;
    boolean[] columnsToInclude = null;
    if (useIncludeColumns) {
        columnsToInclude = new boolean[columnCount];
        for (int i = 0; i < columnCount; i++) {
            columnsToInclude[i] = r.nextBoolean();
        }
    }
    int writeColumnCount = columnCount;
    PrimitiveTypeInfo[] writePrimitiveTypeInfos = primitiveTypeInfos;
    if (doWriteFewerColumns) {
        writeColumnCount = writeRowOI.getAllStructFieldRefs().size();
        writePrimitiveTypeInfos = Arrays.copyOf(primitiveTypeInfos, writeColumnCount);
    }
    LazyBinarySerializeWrite lazyBinarySerializeWrite = new LazyBinarySerializeWrite(writeColumnCount);
    // Try to serialize
    BytesWritable[] serializeWriteBytes = new BytesWritable[rowCount];
    for (int i = 0; i < rowCount; i++) {
        Object[] row = rows[i];
        Output output = new Output();
        lazyBinarySerializeWrite.set(output);
        for (int index = 0; index < writeColumnCount; index++) {
            Writable writable = (Writable) row[index];
            VerifyFast.serializeWrite(lazyBinarySerializeWrite, primitiveTypeInfos[index], writable);
        }
        BytesWritable bytesWritable = new BytesWritable();
        bytesWritable.set(output.getData(), 0, output.getLength());
        serializeWriteBytes[i] = bytesWritable;
    }
    // Try to deserialize
    for (int i = 0; i < rowCount; i++) {
        Object[] row = rows[i];
        // Specifying the right type info length tells LazyBinaryDeserializeRead which is the last
        // column.
        LazyBinaryDeserializeRead lazyBinaryDeserializeRead = new LazyBinaryDeserializeRead(writePrimitiveTypeInfos, /* useExternalBuffer */
        false);
        BytesWritable bytesWritable = serializeWriteBytes[i];
        lazyBinaryDeserializeRead.set(bytesWritable.getBytes(), 0, bytesWritable.getLength());
        for (int index = 0; index < columnCount; index++) {
            if (useIncludeColumns && !columnsToInclude[index]) {
                lazyBinaryDeserializeRead.skipNextField();
            } else if (index >= writeColumnCount) {
                // Should come back a null.
                VerifyFast.verifyDeserializeRead(lazyBinaryDeserializeRead, primitiveTypeInfos[index], null);
            } else {
                Writable writable = (Writable) row[index];
                VerifyFast.verifyDeserializeRead(lazyBinaryDeserializeRead, primitiveTypeInfos[index], writable);
            }
        }
        if (writeColumnCount == columnCount) {
            TestCase.assertTrue(lazyBinaryDeserializeRead.isEndOfInputReached());
        }
    }
    // Try to deserialize using SerDe class our Writable row objects created by SerializeWrite.
    for (int i = 0; i < rowCount; i++) {
        BytesWritable bytesWritable = serializeWriteBytes[i];
        LazyBinaryStruct lazyBinaryStruct;
        if (doWriteFewerColumns) {
            lazyBinaryStruct = (LazyBinaryStruct) serde_fewer.deserialize(bytesWritable);
        } else {
            lazyBinaryStruct = (LazyBinaryStruct) serde.deserialize(bytesWritable);
        }
        Object[] row = rows[i];
        for (int index = 0; index < writeColumnCount; index++) {
            PrimitiveTypeInfo primitiveTypeInfo = primitiveTypeInfos[index];
            Writable writable = (Writable) row[index];
            Object object = lazyBinaryStruct.getField(index);
            if (writable == null || object == null) {
                if (writable != null || object != null) {
                    fail("SerDe deserialized NULL column mismatch");
                }
            } else {
                if (!object.equals(writable)) {
                    fail("SerDe deserialized value does not match");
                }
            }
        }
    }
    // One Writable per row.
    BytesWritable[] serdeBytes = new BytesWritable[rowCount];
    // Serialize using the SerDe, then below deserialize using DeserializeRead.
    Object[] serdeRow = new Object[writeColumnCount];
    for (int i = 0; i < rowCount; i++) {
        Object[] row = rows[i];
        // LazyBinary seems to work better with an row object array instead of a Java object...
        for (int index = 0; index < writeColumnCount; index++) {
            serdeRow[index] = row[index];
        }
        BytesWritable serialized;
        if (doWriteFewerColumns) {
            serialized = (BytesWritable) serde_fewer.serialize(serdeRow, writeRowOI);
        } else {
            serialized = (BytesWritable) serde.serialize(serdeRow, rowOI);
        }
        BytesWritable bytesWritable = new BytesWritable(Arrays.copyOfRange(serialized.getBytes(), 0, serialized.getLength()));
        byte[] bytes1 = bytesWritable.getBytes();
        BytesWritable lazySerializedWriteBytes = serializeWriteBytes[i];
        byte[] bytes2 = Arrays.copyOfRange(lazySerializedWriteBytes.getBytes(), 0, lazySerializedWriteBytes.getLength());
        if (bytes1.length != bytes2.length) {
            fail("SerializeWrite length " + bytes2.length + " and " + "SerDe serialization length " + bytes1.length + " do not match (" + Arrays.toString(primitiveTypeInfos) + ")");
        }
        if (!Arrays.equals(bytes1, bytes2)) {
            fail("SerializeWrite and SerDe serialization does not match (" + Arrays.toString(primitiveTypeInfos) + ")");
        }
        serdeBytes[i] = bytesWritable;
    }
    // Try to deserialize using DeserializeRead our Writable row objects created by SerDe.
    for (int i = 0; i < rowCount; i++) {
        Object[] row = rows[i];
        // When doWriteFewerColumns, try to read more fields than exist in buffer.
        LazyBinaryDeserializeRead lazyBinaryDeserializeRead = new LazyBinaryDeserializeRead(primitiveTypeInfos, /* useExternalBuffer */
        false);
        BytesWritable bytesWritable = serdeBytes[i];
        lazyBinaryDeserializeRead.set(bytesWritable.getBytes(), 0, bytesWritable.getLength());
        for (int index = 0; index < columnCount; index++) {
            if (useIncludeColumns && !columnsToInclude[index]) {
                lazyBinaryDeserializeRead.skipNextField();
            } else if (index >= writeColumnCount) {
                // Should come back a null.
                VerifyFast.verifyDeserializeRead(lazyBinaryDeserializeRead, primitiveTypeInfos[index], null);
            } else {
                Writable writable = (Writable) row[index];
                VerifyFast.verifyDeserializeRead(lazyBinaryDeserializeRead, primitiveTypeInfos[index], writable);
            }
        }
        if (writeColumnCount == columnCount) {
            TestCase.assertTrue(lazyBinaryDeserializeRead.isEndOfInputReached());
        }
    }
}
Also used : LazyBinarySerializeWrite(org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinarySerializeWrite) Writable(org.apache.hadoop.io.Writable) BytesWritable(org.apache.hadoop.io.BytesWritable) BytesWritable(org.apache.hadoop.io.BytesWritable) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) Output(org.apache.hadoop.hive.serde2.ByteStream.Output) LazyBinaryDeserializeRead(org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead)

Aggregations

PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)110 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)41 ArrayList (java.util.ArrayList)37 PrimitiveCategory (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory)33 StructTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo)26 DecimalTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo)25 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)23 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)20 ListTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo)19 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)18 MapTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo)18 HiveDecimal (org.apache.hadoop.hive.common.type.HiveDecimal)15 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)15 BytesWritable (org.apache.hadoop.io.BytesWritable)15 CharTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo)14 VarcharTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo)14 IntWritable (org.apache.hadoop.io.IntWritable)13 Text (org.apache.hadoop.io.Text)13 Category (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category)11 BooleanWritable (org.apache.hadoop.io.BooleanWritable)11