Search in sources :

Example 26 with StructObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector in project hive by apache.

the class TestCrossMapEqualComparer method serializeAndDeserialize.

Object serializeAndDeserialize(TextStringMapHolder o1, StructObjectInspector oi1, LazySimpleSerDe serde, LazySerDeParameters serdeParams) throws IOException, SerDeException {
    ByteStream.Output serializeStream = new ByteStream.Output();
    LazySimpleSerDe.serialize(serializeStream, o1, oi1, serdeParams.getSeparators(), 0, serdeParams.getNullSequence(), serdeParams.isEscaped(), serdeParams.getEscapeChar(), serdeParams.getNeedsEscape());
    Text t = new Text(serializeStream.toByteArray());
    return serde.deserialize(t);
}
Also used : ByteStream(org.apache.hadoop.hive.serde2.ByteStream) Text(org.apache.hadoop.io.Text)

Example 27 with StructObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector in project hive by apache.

the class TestObjectInspectorUtils method testObjectInspectorUtils.

public void testObjectInspectorUtils() throws Throwable {
    try {
        ObjectInspector oi1 = ObjectInspectorFactory.getReflectionObjectInspector(Complex.class, ObjectInspectorFactory.ObjectInspectorOptions.THRIFT);
        // metadata
        assertEquals(Category.STRUCT, oi1.getCategory());
        // standard ObjectInspector
        StructObjectInspector soi = (StructObjectInspector) ObjectInspectorUtils.getStandardObjectInspector(oi1);
        List<? extends StructField> fields = soi.getAllStructFieldRefs();
        assertEquals(10, fields.size());
        assertEquals(fields.get(0), soi.getStructFieldRef("aint"));
        // null
        for (int i = 0; i < fields.size(); i++) {
            assertNull(soi.getStructFieldData(null, fields.get(i)));
        }
        // real object
        Complex cc = new Complex();
        cc.setAint(1);
        cc.setAString("test");
        List<Integer> c2 = Arrays.asList(new Integer[] { 1, 2, 3 });
        cc.setLint(c2);
        List<String> c3 = Arrays.asList(new String[] { "one", "two" });
        cc.setLString(c3);
        List<IntString> c4 = new ArrayList<IntString>();
        cc.setLintString(c4);
        cc.setMStringString(null);
        // standard object
        Object c = ObjectInspectorUtils.copyToStandardObject(cc, oi1);
        assertEquals(1, soi.getStructFieldData(c, fields.get(0)));
        assertEquals("test", soi.getStructFieldData(c, fields.get(1)));
        assertEquals(c2, soi.getStructFieldData(c, fields.get(2)));
        assertEquals(c3, soi.getStructFieldData(c, fields.get(3)));
        assertEquals(c4, soi.getStructFieldData(c, fields.get(4)));
        assertNull(soi.getStructFieldData(c, fields.get(5)));
        ArrayList<Object> cfields = new ArrayList<Object>();
        for (int i = 0; i < 10; i++) {
            cfields.add(soi.getStructFieldData(c, fields.get(i)));
        }
        assertEquals(cfields, soi.getStructFieldsDataAsList(c));
        // sub fields
        assertEquals(PrimitiveObjectInspectorFactory.javaIntObjectInspector, fields.get(0).getFieldObjectInspector());
        assertEquals(PrimitiveObjectInspectorFactory.javaStringObjectInspector, fields.get(1).getFieldObjectInspector());
        assertEquals(ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.javaIntObjectInspector), fields.get(2).getFieldObjectInspector());
        assertEquals(ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.javaStringObjectInspector), fields.get(3).getFieldObjectInspector());
        assertEquals(ObjectInspectorUtils.getStandardObjectInspector(ObjectInspectorFactory.getStandardListObjectInspector(ObjectInspectorFactory.getReflectionObjectInspector(IntString.class, ObjectInspectorFactory.ObjectInspectorOptions.THRIFT))), fields.get(4).getFieldObjectInspector());
        assertEquals(ObjectInspectorFactory.getStandardMapObjectInspector(PrimitiveObjectInspectorFactory.javaStringObjectInspector, PrimitiveObjectInspectorFactory.javaStringObjectInspector), fields.get(5).getFieldObjectInspector());
    } catch (Throwable e) {
        e.printStackTrace();
        throw e;
    }
}
Also used : ArrayList(java.util.ArrayList) IntString(org.apache.hadoop.hive.serde2.thrift.test.IntString) Complex(org.apache.hadoop.hive.serde2.thrift.test.Complex) IntString(org.apache.hadoop.hive.serde2.thrift.test.IntString)

Example 28 with StructObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector in project hive by apache.

the class TestProtocolBuffersObjectInspectors method testProtocolBuffersObjectInspectors.

public void testProtocolBuffersObjectInspectors() throws Throwable {
    try {
        ObjectInspector oi1 = ObjectInspectorFactory.getReflectionObjectInspector(Complex.class, ObjectInspectorFactory.ObjectInspectorOptions.PROTOCOL_BUFFERS);
        ObjectInspector oi2 = ObjectInspectorFactory.getReflectionObjectInspector(Complex.class, ObjectInspectorFactory.ObjectInspectorOptions.PROTOCOL_BUFFERS);
        assertEquals(oi1, oi2);
        // metadata
        assertEquals(Category.STRUCT, oi1.getCategory());
        StructObjectInspector soi = (StructObjectInspector) oi1;
        List<? extends StructField> fields = soi.getAllStructFieldRefs();
        assertEquals(5, fields.size());
        assertEquals(fields.get(0), soi.getStructFieldRef("aint_"));
        // null
        for (int i = 0; i < fields.size(); i++) {
            assertNull(soi.getStructFieldData(null, fields.get(i)));
        }
        // real object
        List<Integer> c2 = Arrays.asList(new Integer[] { 1, 2, 3 });
        List<String> c3 = Arrays.asList(new String[] { "one", "two" });
        List<IntString> c4 = new ArrayList<IntString>();
        Complex c = new Complex(1, "test", c2, c3, c4);
        assertEquals(1, soi.getStructFieldData(c, fields.get(0)));
        assertEquals("test", soi.getStructFieldData(c, fields.get(1)));
        assertEquals(c2, soi.getStructFieldData(c, fields.get(2)));
        assertEquals(c3, soi.getStructFieldData(c, fields.get(3)));
        assertEquals(c4, soi.getStructFieldData(c, fields.get(4)));
        ArrayList<Object> cfields = new ArrayList<Object>();
        for (int i = 0; i < 5; i++) {
            cfields.add(soi.getStructFieldData(c, fields.get(i)));
        }
        assertEquals(cfields, soi.getStructFieldsDataAsList(c));
        // sub fields
        assertEquals(PrimitiveObjectInspectorFactory.javaIntObjectInspector, fields.get(0).getFieldObjectInspector());
        assertEquals(PrimitiveObjectInspectorFactory.javaStringObjectInspector, fields.get(1).getFieldObjectInspector());
        assertEquals(ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.javaIntObjectInspector), fields.get(2).getFieldObjectInspector());
        assertEquals(ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.javaStringObjectInspector), fields.get(3).getFieldObjectInspector());
        assertEquals(ObjectInspectorFactory.getStandardListObjectInspector(ObjectInspectorFactory.getReflectionObjectInspector(IntString.class, ObjectInspectorFactory.ObjectInspectorOptions.PROTOCOL_BUFFERS)), fields.get(4).getFieldObjectInspector());
    } catch (Throwable e) {
        e.printStackTrace();
        throw e;
    }
}
Also used : ArrayList(java.util.ArrayList) IntString(org.apache.hadoop.hive.serde2.proto.test.Complexpb.IntString) Complex(org.apache.hadoop.hive.serde2.proto.test.Complexpb.Complex) IntString(org.apache.hadoop.hive.serde2.proto.test.Complexpb.IntString)

Example 29 with StructObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector in project hive by apache.

the class TestLazyBinaryFast method testLazyBinaryFastCase.

public void testLazyBinaryFastCase(int caseNum, boolean doNonRandomFill, Random r) throws Throwable {
    SerdeRandomRowSource source = new SerdeRandomRowSource();
    source.init(r);
    int rowCount = 1000;
    Object[][] rows = source.randomRows(rowCount);
    if (doNonRandomFill) {
        MyTestClass.nonRandomRowFill(rows, source.primitiveCategories());
    }
    StructObjectInspector rowStructObjectInspector = source.rowStructObjectInspector();
    PrimitiveTypeInfo[] primitiveTypeInfos = source.primitiveTypeInfos();
    int columnCount = primitiveTypeInfos.length;
    int writeColumnCount = columnCount;
    StructObjectInspector writeRowStructObjectInspector = rowStructObjectInspector;
    boolean doWriteFewerColumns = r.nextBoolean();
    if (doWriteFewerColumns) {
        writeColumnCount = 1 + r.nextInt(columnCount);
        if (writeColumnCount == columnCount) {
            doWriteFewerColumns = false;
        } else {
            writeRowStructObjectInspector = source.partialRowStructObjectInspector(writeColumnCount);
        }
    }
    String fieldNames = ObjectInspectorUtils.getFieldNames(rowStructObjectInspector);
    String fieldTypes = ObjectInspectorUtils.getFieldTypes(rowStructObjectInspector);
    AbstractSerDe serde = TestLazyBinarySerDe.getSerDe(fieldNames, fieldTypes);
    AbstractSerDe serde_fewer = null;
    if (doWriteFewerColumns) {
        String partialFieldNames = ObjectInspectorUtils.getFieldNames(writeRowStructObjectInspector);
        String partialFieldTypes = ObjectInspectorUtils.getFieldTypes(writeRowStructObjectInspector);
        serde_fewer = TestLazyBinarySerDe.getSerDe(partialFieldNames, partialFieldTypes);
        ;
    }
    testLazyBinaryFast(source, rows, serde, rowStructObjectInspector, serde_fewer, writeRowStructObjectInspector, primitiveTypeInfos, /* useIncludeColumns */
    false, /* doWriteFewerColumns */
    false, r);
    testLazyBinaryFast(source, rows, serde, rowStructObjectInspector, serde_fewer, writeRowStructObjectInspector, primitiveTypeInfos, /* useIncludeColumns */
    true, /* doWriteFewerColumns */
    false, r);
/*
     * Can the LazyBinary format really tolerate writing fewer columns?
     */
// if (doWriteFewerColumns) {
//   testLazyBinaryFast(
//       source, rows,
//       serde, rowStructObjectInspector,
//       serde_fewer, writeRowStructObjectInspector,
//       primitiveTypeInfos,
//       /* useIncludeColumns */ false, /* doWriteFewerColumns */ true, r);
//   testLazyBinaryFast(
//       source, rows,
//       serde, rowStructObjectInspector,
//       serde_fewer, writeRowStructObjectInspector,
//       primitiveTypeInfos,
//       /* useIncludeColumns */ true, /* doWriteFewerColumns */ true, r);
// }
}
Also used : SerdeRandomRowSource(org.apache.hadoop.hive.serde2.SerdeRandomRowSource) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) AbstractSerDe(org.apache.hadoop.hive.serde2.AbstractSerDe) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 30 with StructObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector in project hive by apache.

the class TestLazyBinaryFast method testLazyBinaryFast.

private void testLazyBinaryFast(SerdeRandomRowSource source, Object[][] rows, AbstractSerDe serde, StructObjectInspector rowOI, AbstractSerDe serde_fewer, StructObjectInspector writeRowOI, PrimitiveTypeInfo[] primitiveTypeInfos, boolean useIncludeColumns, boolean doWriteFewerColumns, Random r) throws Throwable {
    int rowCount = rows.length;
    int columnCount = primitiveTypeInfos.length;
    boolean[] columnsToInclude = null;
    if (useIncludeColumns) {
        columnsToInclude = new boolean[columnCount];
        for (int i = 0; i < columnCount; i++) {
            columnsToInclude[i] = r.nextBoolean();
        }
    }
    int writeColumnCount = columnCount;
    PrimitiveTypeInfo[] writePrimitiveTypeInfos = primitiveTypeInfos;
    if (doWriteFewerColumns) {
        writeColumnCount = writeRowOI.getAllStructFieldRefs().size();
        writePrimitiveTypeInfos = Arrays.copyOf(primitiveTypeInfos, writeColumnCount);
    }
    LazyBinarySerializeWrite lazyBinarySerializeWrite = new LazyBinarySerializeWrite(writeColumnCount);
    // Try to serialize
    BytesWritable[] serializeWriteBytes = new BytesWritable[rowCount];
    for (int i = 0; i < rowCount; i++) {
        Object[] row = rows[i];
        Output output = new Output();
        lazyBinarySerializeWrite.set(output);
        for (int index = 0; index < writeColumnCount; index++) {
            Writable writable = (Writable) row[index];
            VerifyFast.serializeWrite(lazyBinarySerializeWrite, primitiveTypeInfos[index], writable);
        }
        BytesWritable bytesWritable = new BytesWritable();
        bytesWritable.set(output.getData(), 0, output.getLength());
        serializeWriteBytes[i] = bytesWritable;
    }
    // Try to deserialize
    for (int i = 0; i < rowCount; i++) {
        Object[] row = rows[i];
        // Specifying the right type info length tells LazyBinaryDeserializeRead which is the last
        // column.
        LazyBinaryDeserializeRead lazyBinaryDeserializeRead = new LazyBinaryDeserializeRead(writePrimitiveTypeInfos, /* useExternalBuffer */
        false);
        BytesWritable bytesWritable = serializeWriteBytes[i];
        lazyBinaryDeserializeRead.set(bytesWritable.getBytes(), 0, bytesWritable.getLength());
        for (int index = 0; index < columnCount; index++) {
            if (useIncludeColumns && !columnsToInclude[index]) {
                lazyBinaryDeserializeRead.skipNextField();
            } else if (index >= writeColumnCount) {
                // Should come back a null.
                VerifyFast.verifyDeserializeRead(lazyBinaryDeserializeRead, primitiveTypeInfos[index], null);
            } else {
                Writable writable = (Writable) row[index];
                VerifyFast.verifyDeserializeRead(lazyBinaryDeserializeRead, primitiveTypeInfos[index], writable);
            }
        }
        if (writeColumnCount == columnCount) {
            TestCase.assertTrue(lazyBinaryDeserializeRead.isEndOfInputReached());
        }
    }
    // Try to deserialize using SerDe class our Writable row objects created by SerializeWrite.
    for (int i = 0; i < rowCount; i++) {
        BytesWritable bytesWritable = serializeWriteBytes[i];
        LazyBinaryStruct lazyBinaryStruct;
        if (doWriteFewerColumns) {
            lazyBinaryStruct = (LazyBinaryStruct) serde_fewer.deserialize(bytesWritable);
        } else {
            lazyBinaryStruct = (LazyBinaryStruct) serde.deserialize(bytesWritable);
        }
        Object[] row = rows[i];
        for (int index = 0; index < writeColumnCount; index++) {
            PrimitiveTypeInfo primitiveTypeInfo = primitiveTypeInfos[index];
            Writable writable = (Writable) row[index];
            Object object = lazyBinaryStruct.getField(index);
            if (writable == null || object == null) {
                if (writable != null || object != null) {
                    fail("SerDe deserialized NULL column mismatch");
                }
            } else {
                if (!object.equals(writable)) {
                    fail("SerDe deserialized value does not match");
                }
            }
        }
    }
    // One Writable per row.
    BytesWritable[] serdeBytes = new BytesWritable[rowCount];
    // Serialize using the SerDe, then below deserialize using DeserializeRead.
    Object[] serdeRow = new Object[writeColumnCount];
    for (int i = 0; i < rowCount; i++) {
        Object[] row = rows[i];
        // LazyBinary seems to work better with an row object array instead of a Java object...
        for (int index = 0; index < writeColumnCount; index++) {
            serdeRow[index] = row[index];
        }
        BytesWritable serialized;
        if (doWriteFewerColumns) {
            serialized = (BytesWritable) serde_fewer.serialize(serdeRow, writeRowOI);
        } else {
            serialized = (BytesWritable) serde.serialize(serdeRow, rowOI);
        }
        BytesWritable bytesWritable = new BytesWritable(Arrays.copyOfRange(serialized.getBytes(), 0, serialized.getLength()));
        byte[] bytes1 = bytesWritable.getBytes();
        BytesWritable lazySerializedWriteBytes = serializeWriteBytes[i];
        byte[] bytes2 = Arrays.copyOfRange(lazySerializedWriteBytes.getBytes(), 0, lazySerializedWriteBytes.getLength());
        if (bytes1.length != bytes2.length) {
            fail("SerializeWrite length " + bytes2.length + " and " + "SerDe serialization length " + bytes1.length + " do not match (" + Arrays.toString(primitiveTypeInfos) + ")");
        }
        if (!Arrays.equals(bytes1, bytes2)) {
            fail("SerializeWrite and SerDe serialization does not match (" + Arrays.toString(primitiveTypeInfos) + ")");
        }
        serdeBytes[i] = bytesWritable;
    }
    // Try to deserialize using DeserializeRead our Writable row objects created by SerDe.
    for (int i = 0; i < rowCount; i++) {
        Object[] row = rows[i];
        // When doWriteFewerColumns, try to read more fields than exist in buffer.
        LazyBinaryDeserializeRead lazyBinaryDeserializeRead = new LazyBinaryDeserializeRead(primitiveTypeInfos, /* useExternalBuffer */
        false);
        BytesWritable bytesWritable = serdeBytes[i];
        lazyBinaryDeserializeRead.set(bytesWritable.getBytes(), 0, bytesWritable.getLength());
        for (int index = 0; index < columnCount; index++) {
            if (useIncludeColumns && !columnsToInclude[index]) {
                lazyBinaryDeserializeRead.skipNextField();
            } else if (index >= writeColumnCount) {
                // Should come back a null.
                VerifyFast.verifyDeserializeRead(lazyBinaryDeserializeRead, primitiveTypeInfos[index], null);
            } else {
                Writable writable = (Writable) row[index];
                VerifyFast.verifyDeserializeRead(lazyBinaryDeserializeRead, primitiveTypeInfos[index], writable);
            }
        }
        if (writeColumnCount == columnCount) {
            TestCase.assertTrue(lazyBinaryDeserializeRead.isEndOfInputReached());
        }
    }
}
Also used : LazyBinarySerializeWrite(org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinarySerializeWrite) Writable(org.apache.hadoop.io.Writable) BytesWritable(org.apache.hadoop.io.BytesWritable) BytesWritable(org.apache.hadoop.io.BytesWritable) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) Output(org.apache.hadoop.hive.serde2.ByteStream.Output) LazyBinaryDeserializeRead(org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead)

Aggregations

StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)232 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)113 ArrayList (java.util.ArrayList)84 StructField (org.apache.hadoop.hive.serde2.objectinspector.StructField)69 PrimitiveObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)46 SerDeException (org.apache.hadoop.hive.serde2.SerDeException)42 ListObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector)42 MapObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector)40 Test (org.junit.Test)38 Properties (java.util.Properties)35 Text (org.apache.hadoop.io.Text)32 StringObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector)30 Path (org.apache.hadoop.fs.Path)29 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)27 IOException (java.io.IOException)25 Configuration (org.apache.hadoop.conf.Configuration)25 IntObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector)24 LongObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector)24 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)23 InputSplit (org.apache.hadoop.mapred.InputSplit)23