Search in sources :

Example 31 with StructObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector in project hive by apache.

the class TestLazyBinarySerDe method testShorterSchemaDeserialization1.

/**
   * Test shorter schema deserialization where a bigger struct is serialized and
   * it is then deserialized with a smaller struct. Here the serialized struct
   * has 9 fields and we deserialized to a struct of 8 fields.
   */
private void testShorterSchemaDeserialization1(Random r) throws Throwable {
    StructObjectInspector rowOI1 = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(MyTestClass.class, ObjectInspectorOptions.JAVA);
    String fieldNames1 = ObjectInspectorUtils.getFieldNames(rowOI1);
    String fieldTypes1 = ObjectInspectorUtils.getFieldTypes(rowOI1);
    AbstractSerDe serde1 = getSerDe(fieldNames1, fieldTypes1);
    serde1.getObjectInspector();
    StructObjectInspector rowOI2 = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(MyTestClassSmaller.class, ObjectInspectorOptions.JAVA);
    String fieldNames2 = ObjectInspectorUtils.getFieldNames(rowOI2);
    String fieldTypes2 = ObjectInspectorUtils.getFieldTypes(rowOI2);
    AbstractSerDe serde2 = getSerDe(fieldNames2, fieldTypes2);
    ObjectInspector serdeOI2 = serde2.getObjectInspector();
    int num = 100;
    for (int itest = 0; itest < num; itest++) {
        MyTestClass t = new MyTestClass();
        ExtraTypeInfo extraTypeInfo = new ExtraTypeInfo();
        t.randomFill(r, extraTypeInfo);
        BytesWritable bw = (BytesWritable) serde1.serialize(t, rowOI1);
        Object output = serde2.deserialize(bw);
        if (0 != compareDiffSizedStructs(t, rowOI1, output, serdeOI2)) {
            System.out.println("structs      = " + SerDeUtils.getJSONString(t, rowOI1));
            System.out.println("deserialized = " + SerDeUtils.getJSONString(output, serdeOI2));
            System.out.println("serialized   = " + TestBinarySortableSerDe.hexString(bw));
            assertEquals(t, output);
        }
    }
}
Also used : AbstractPrimitiveLazyObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.AbstractPrimitiveLazyObjectInspector) LazyBinaryMapObjectInspector(org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryMapObjectInspector) WritableBinaryObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBinaryObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) JavaBinaryObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaBinaryObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) MyTestClass(org.apache.hadoop.hive.serde2.binarysortable.MyTestClass) ExtraTypeInfo(org.apache.hadoop.hive.serde2.binarysortable.MyTestPrimitiveClass.ExtraTypeInfo) BytesWritable(org.apache.hadoop.io.BytesWritable) AbstractSerDe(org.apache.hadoop.hive.serde2.AbstractSerDe) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 32 with StructObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector in project hive by apache.

the class TestLazySimpleFast method testLazySimpleFast.

private void testLazySimpleFast(SerdeRandomRowSource source, Object[][] rows, LazySimpleSerDe serde, StructObjectInspector rowOI, LazySimpleSerDe serde_fewer, StructObjectInspector writeRowOI, byte separator, LazySerDeParameters serdeParams, LazySerDeParameters serdeParams_fewer, PrimitiveTypeInfo[] primitiveTypeInfos, boolean useIncludeColumns, boolean doWriteFewerColumns, Random r) throws Throwable {
    int rowCount = rows.length;
    int columnCount = primitiveTypeInfos.length;
    boolean[] columnsToInclude = null;
    if (useIncludeColumns) {
        columnsToInclude = new boolean[columnCount];
        for (int i = 0; i < columnCount; i++) {
            columnsToInclude[i] = r.nextBoolean();
        }
    }
    int writeColumnCount = columnCount;
    PrimitiveTypeInfo[] writePrimitiveTypeInfos = primitiveTypeInfos;
    if (doWriteFewerColumns) {
        writeColumnCount = writeRowOI.getAllStructFieldRefs().size();
        writePrimitiveTypeInfos = Arrays.copyOf(primitiveTypeInfos, writeColumnCount);
    }
    // Try to serialize
    BytesWritable[] serializeWriteBytes = new BytesWritable[rowCount];
    for (int i = 0; i < rowCount; i++) {
        Object[] row = rows[i];
        Output output = new Output();
        LazySimpleSerializeWrite lazySimpleSerializeWrite = new LazySimpleSerializeWrite(columnCount, separator, serdeParams);
        lazySimpleSerializeWrite.set(output);
        for (int index = 0; index < columnCount; index++) {
            Writable writable = (Writable) row[index];
            VerifyFast.serializeWrite(lazySimpleSerializeWrite, primitiveTypeInfos[index], writable);
        }
        BytesWritable bytesWritable = new BytesWritable();
        bytesWritable.set(output.getData(), 0, output.getLength());
        serializeWriteBytes[i] = bytesWritable;
    }
    // Try to deserialize
    for (int i = 0; i < rowCount; i++) {
        Object[] row = rows[i];
        LazySimpleDeserializeRead lazySimpleDeserializeRead = new LazySimpleDeserializeRead(writePrimitiveTypeInfos, /* useExternalBuffer */
        false, separator, serdeParams);
        BytesWritable bytesWritable = serializeWriteBytes[i];
        byte[] bytes = bytesWritable.getBytes();
        int length = bytesWritable.getLength();
        lazySimpleDeserializeRead.set(bytes, 0, length);
        char[] chars = new char[length];
        for (int c = 0; c < chars.length; c++) {
            chars[c] = (char) (bytes[c] & 0xFF);
        }
        for (int index = 0; index < columnCount; index++) {
            if (useIncludeColumns && !columnsToInclude[index]) {
                lazySimpleDeserializeRead.skipNextField();
            } else if (index >= writeColumnCount) {
                // Should come back a null.
                VerifyFast.verifyDeserializeRead(lazySimpleDeserializeRead, primitiveTypeInfos[index], null);
            } else {
                Writable writable = (Writable) row[index];
                VerifyFast.verifyDeserializeRead(lazySimpleDeserializeRead, primitiveTypeInfos[index], writable);
            }
        }
        if (writeColumnCount == columnCount) {
            TestCase.assertTrue(lazySimpleDeserializeRead.isEndOfInputReached());
        }
    }
    // Try to deserialize using SerDe class our Writable row objects created by SerializeWrite.
    for (int i = 0; i < rowCount; i++) {
        BytesWritable bytesWritable = serializeWriteBytes[i];
        LazyStruct lazySimpleStruct = (LazyStruct) serde.deserialize(bytesWritable);
        Object[] row = rows[i];
        for (int index = 0; index < columnCount; index++) {
            PrimitiveTypeInfo primitiveTypeInfo = primitiveTypeInfos[index];
            Writable writable = (Writable) row[index];
            LazyPrimitive lazyPrimitive = (LazyPrimitive) lazySimpleStruct.getField(index);
            Object object;
            if (lazyPrimitive != null) {
                object = lazyPrimitive.getWritableObject();
            } else {
                object = null;
            }
            if (writable == null || object == null) {
                if (writable != null || object != null) {
                    fail("SerDe deserialized NULL column mismatch");
                }
            } else {
                if (!object.equals(writable)) {
                    fail("SerDe deserialized value does not match");
                }
            }
        }
    }
    // One Writable per row.
    byte[][] serdeBytes = new byte[rowCount][];
    // Serialize using the SerDe, then below deserialize using DeserializeRead.
    Object[] serdeRow = new Object[columnCount];
    for (int i = 0; i < rowCount; i++) {
        Object[] row = rows[i];
        // LazySimple seems to work better with an row object array instead of a Java object...
        for (int index = 0; index < columnCount; index++) {
            serdeRow[index] = row[index];
        }
        Text serialized = (Text) serde.serialize(serdeRow, rowOI);
        byte[] bytes1 = Arrays.copyOfRange(serialized.getBytes(), 0, serialized.getLength());
        byte[] bytes2 = Arrays.copyOfRange(serializeWriteBytes[i].getBytes(), 0, serializeWriteBytes[i].getLength());
        if (!Arrays.equals(bytes1, bytes2)) {
            fail("SerializeWrite and SerDe serialization does not match");
        }
        serdeBytes[i] = copyBytes(serialized);
    }
    // Try to deserialize using DeserializeRead our Writable row objects created by SerDe.
    for (int i = 0; i < rowCount; i++) {
        Object[] row = rows[i];
        LazySimpleDeserializeRead lazySimpleDeserializeRead = new LazySimpleDeserializeRead(writePrimitiveTypeInfos, /* useExternalBuffer */
        false, separator, serdeParams);
        byte[] bytes = serdeBytes[i];
        lazySimpleDeserializeRead.set(bytes, 0, bytes.length);
        for (int index = 0; index < columnCount; index++) {
            if (useIncludeColumns && !columnsToInclude[index]) {
                lazySimpleDeserializeRead.skipNextField();
            } else if (index >= writeColumnCount) {
                // Should come back a null.
                VerifyFast.verifyDeserializeRead(lazySimpleDeserializeRead, primitiveTypeInfos[index], null);
            } else {
                Writable writable = (Writable) row[index];
                VerifyFast.verifyDeserializeRead(lazySimpleDeserializeRead, primitiveTypeInfos[index], writable);
            }
        }
        if (writeColumnCount == columnCount) {
            TestCase.assertTrue(lazySimpleDeserializeRead.isEndOfInputReached());
        }
    }
}
Also used : Writable(org.apache.hadoop.io.Writable) BytesWritable(org.apache.hadoop.io.BytesWritable) LazySimpleDeserializeRead(org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleDeserializeRead) BytesWritable(org.apache.hadoop.io.BytesWritable) Text(org.apache.hadoop.io.Text) LazySimpleSerializeWrite(org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleSerializeWrite) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) Output(org.apache.hadoop.hive.serde2.ByteStream.Output)

Example 33 with StructObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector in project hive by apache.

the class TestLazySimpleFast method testLazySimpleFastCase.

public void testLazySimpleFastCase(int caseNum, boolean doNonRandomFill, Random r) throws Throwable {
    SerdeRandomRowSource source = new SerdeRandomRowSource();
    source.init(r);
    int rowCount = 1000;
    Object[][] rows = source.randomRows(rowCount);
    if (doNonRandomFill) {
        MyTestClass.nonRandomRowFill(rows, source.primitiveCategories());
    }
    StructObjectInspector rowStructObjectInspector = source.rowStructObjectInspector();
    PrimitiveTypeInfo[] primitiveTypeInfos = source.primitiveTypeInfos();
    int columnCount = primitiveTypeInfos.length;
    int writeColumnCount = columnCount;
    StructObjectInspector writeRowStructObjectInspector = rowStructObjectInspector;
    boolean doWriteFewerColumns = r.nextBoolean();
    if (doWriteFewerColumns) {
        writeColumnCount = 1 + r.nextInt(columnCount);
        if (writeColumnCount == columnCount) {
            doWriteFewerColumns = false;
        } else {
            writeRowStructObjectInspector = source.partialRowStructObjectInspector(writeColumnCount);
        }
    }
    String fieldNames = ObjectInspectorUtils.getFieldNames(rowStructObjectInspector);
    String fieldTypes = ObjectInspectorUtils.getFieldTypes(rowStructObjectInspector);
    LazySimpleSerDe serde = getSerDe(fieldNames, fieldTypes);
    LazySerDeParameters serdeParams = getSerDeParams(fieldNames, fieldTypes);
    LazySimpleSerDe serde_fewer = null;
    LazySerDeParameters serdeParams_fewer = null;
    if (doWriteFewerColumns) {
        String partialFieldNames = ObjectInspectorUtils.getFieldNames(writeRowStructObjectInspector);
        String partialFieldTypes = ObjectInspectorUtils.getFieldTypes(writeRowStructObjectInspector);
        serde_fewer = getSerDe(fieldNames, fieldTypes);
        serdeParams_fewer = getSerDeParams(partialFieldNames, partialFieldTypes);
    }
    byte separator = (byte) '\t';
    testLazySimpleFast(source, rows, serde, rowStructObjectInspector, serde_fewer, writeRowStructObjectInspector, separator, serdeParams, serdeParams_fewer, primitiveTypeInfos, /* useIncludeColumns */
    false, /* doWriteFewerColumns */
    false, r);
    testLazySimpleFast(source, rows, serde, rowStructObjectInspector, serde_fewer, writeRowStructObjectInspector, separator, serdeParams, serdeParams_fewer, primitiveTypeInfos, /* useIncludeColumns */
    true, /* doWriteFewerColumns */
    false, r);
    if (doWriteFewerColumns) {
        testLazySimpleFast(source, rows, serde, rowStructObjectInspector, serde_fewer, writeRowStructObjectInspector, separator, serdeParams, serdeParams_fewer, primitiveTypeInfos, /* useIncludeColumns */
        false, /* doWriteFewerColumns */
        true, r);
        testLazySimpleFast(source, rows, serde, rowStructObjectInspector, serde_fewer, writeRowStructObjectInspector, separator, serdeParams, serdeParams_fewer, primitiveTypeInfos, /* useIncludeColumns */
        true, /* doWriteFewerColumns */
        true, r);
    }
}
Also used : SerdeRandomRowSource(org.apache.hadoop.hive.serde2.SerdeRandomRowSource) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 34 with StructObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector in project hive by apache.

the class TestLazySimpleSerDe method testSerDeParameters.

/**
   * Tests the deprecated usage of SerDeParameters.
   *
   */
@Test
@SuppressWarnings("deprecation")
public void testSerDeParameters() throws SerDeException, IOException {
    // Setup
    LazySimpleSerDe serDe = new LazySimpleSerDe();
    Configuration conf = new Configuration();
    MyTestClass row = new MyTestClass();
    ExtraTypeInfo extraTypeInfo = new ExtraTypeInfo();
    row.randomFill(new Random(1234), extraTypeInfo);
    StructObjectInspector rowOI = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(MyTestClass.class, ObjectInspectorOptions.JAVA);
    String fieldNames = ObjectInspectorUtils.getFieldNames(rowOI);
    String fieldTypes = ObjectInspectorUtils.getFieldTypes(rowOI);
    Properties schema = new Properties();
    schema.setProperty(serdeConstants.LIST_COLUMNS, fieldNames);
    schema.setProperty(serdeConstants.LIST_COLUMN_TYPES, fieldTypes);
    SerDeUtils.initializeSerDe(serDe, conf, schema, null);
    SerDeParameters serdeParams = LazySimpleSerDe.initSerdeParams(conf, schema, "testSerdeName");
    // Test
    LazyStruct data = (LazyStruct) serializeAndDeserialize(row, rowOI, serDe, serdeParams);
    assertEquals((boolean) row.myBool, ((LazyBoolean) data.getField(0)).getWritableObject().get());
    assertEquals((int) row.myInt, ((LazyInteger) data.getField(3)).getWritableObject().get());
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) Random(java.util.Random) MyTestClass(org.apache.hadoop.hive.serde2.binarysortable.MyTestClass) ExtraTypeInfo(org.apache.hadoop.hive.serde2.binarysortable.MyTestPrimitiveClass.ExtraTypeInfo) SerDeParameters(org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.SerDeParameters) Properties(java.util.Properties) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Test(org.junit.Test)

Example 35 with StructObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector in project hive by apache.

the class TestLazySimpleSerDe method serializeAndDeserialize.

private Object serializeAndDeserialize(Object row, StructObjectInspector rowOI, LazySimpleSerDe serde, LazySerDeParameters serdeParams) throws IOException, SerDeException {
    ByteStream.Output serializeStream = new ByteStream.Output();
    LazySimpleSerDe.serialize(serializeStream, row, rowOI, serdeParams.getSeparators(), 0, serdeParams.getNullSequence(), serdeParams.isEscaped(), serdeParams.getEscapeChar(), serdeParams.getNeedsEscape());
    Text t = new Text(serializeStream.toByteArray());
    return serde.deserialize(t);
}
Also used : ByteStream(org.apache.hadoop.hive.serde2.ByteStream) Text(org.apache.hadoop.io.Text)

Aggregations

StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)232 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)113 ArrayList (java.util.ArrayList)84 StructField (org.apache.hadoop.hive.serde2.objectinspector.StructField)69 PrimitiveObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)46 SerDeException (org.apache.hadoop.hive.serde2.SerDeException)42 ListObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector)42 MapObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector)40 Test (org.junit.Test)38 Properties (java.util.Properties)35 Text (org.apache.hadoop.io.Text)32 StringObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector)30 Path (org.apache.hadoop.fs.Path)29 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)27 IOException (java.io.IOException)25 Configuration (org.apache.hadoop.conf.Configuration)25 IntObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector)24 LongObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector)24 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)23 InputSplit (org.apache.hadoop.mapred.InputSplit)23