Search in sources :

Example 11 with SerdeRandomRowSource

use of org.apache.hadoop.hive.serde2.SerdeRandomRowSource in project hive by apache.

the class TestLazySimpleFast method testLazySimpleFast.

private void testLazySimpleFast(SerdeRandomRowSource source, Object[][] rows, LazySimpleSerDe serde, StructObjectInspector rowOI, LazySimpleSerDe serde_fewer, StructObjectInspector writeRowOI, LazySerDeParameters serdeParams, LazySerDeParameters serdeParams_fewer, TypeInfo[] typeInfos, boolean useIncludeColumns, boolean doWriteFewerColumns, Random r) throws Throwable {
    int rowCount = rows.length;
    int columnCount = typeInfos.length;
    boolean[] columnsToInclude = null;
    if (useIncludeColumns) {
        columnsToInclude = new boolean[columnCount];
        for (int i = 0; i < columnCount; i++) {
            columnsToInclude[i] = r.nextBoolean();
        }
    }
    int writeColumnCount = columnCount;
    TypeInfo[] writeTypeInfos = typeInfos;
    if (doWriteFewerColumns) {
        writeColumnCount = writeRowOI.getAllStructFieldRefs().size();
        writeTypeInfos = Arrays.copyOf(typeInfos, writeColumnCount);
    }
    // Try to serialize
    BytesWritable[] serializeWriteBytes = new BytesWritable[rowCount];
    for (int i = 0; i < rowCount; i++) {
        Object[] row = rows[i];
        Output output = new Output();
        LazySimpleSerializeWrite lazySimpleSerializeWrite = new LazySimpleSerializeWrite(columnCount, serdeParams);
        lazySimpleSerializeWrite.set(output);
        for (int index = 0; index < columnCount; index++) {
            VerifyFast.serializeWrite(lazySimpleSerializeWrite, typeInfos[index], row[index]);
        }
        BytesWritable bytesWritable = new BytesWritable();
        bytesWritable.set(output.getData(), 0, output.getLength());
        serializeWriteBytes[i] = bytesWritable;
    }
    // Try to deserialize
    for (int i = 0; i < rowCount; i++) {
        Object[] row = rows[i];
        LazySimpleDeserializeRead lazySimpleDeserializeRead = new LazySimpleDeserializeRead(writeTypeInfos, /* useExternalBuffer */
        false, serdeParams);
        BytesWritable bytesWritable = serializeWriteBytes[i];
        byte[] bytes = bytesWritable.getBytes();
        int length = bytesWritable.getLength();
        lazySimpleDeserializeRead.set(bytes, 0, length);
        for (int index = 0; index < columnCount; index++) {
            if (useIncludeColumns && !columnsToInclude[index]) {
                lazySimpleDeserializeRead.skipNextField();
            } else if (index >= writeColumnCount) {
                // Should come back a null.
                verifyReadNull(lazySimpleDeserializeRead, typeInfos[index]);
            } else {
                Object expectedObject = row[index];
                verifyRead(lazySimpleDeserializeRead, typeInfos[index], expectedObject);
            }
        }
        if (writeColumnCount == columnCount) {
            assertTrue(lazySimpleDeserializeRead.isEndOfInputReached());
        }
    }
    // Try to deserialize using SerDe class our Writable row objects created by SerializeWrite.
    for (int rowIndex = 0; rowIndex < rowCount; rowIndex++) {
        BytesWritable bytesWritable = serializeWriteBytes[rowIndex];
        LazyStruct lazySimpleStruct = (LazyStruct) serde.deserialize(bytesWritable);
        Object[] row = rows[rowIndex];
        for (int index = 0; index < columnCount; index++) {
            TypeInfo typeInfo = typeInfos[index];
            Object expectedObject = row[index];
            Object object = lazySimpleStruct.getField(index);
            if (expectedObject == null || object == null) {
                if (expectedObject != null || object != null) {
                    fail("SerDe deserialized NULL column mismatch");
                }
            } else {
                if (!VerifyLazy.lazyCompare(typeInfo, object, expectedObject)) {
                    fail("SerDe deserialized value does not match");
                }
            }
        }
    }
    // One Writable per row.
    byte[][] serdeBytes = new byte[rowCount][];
    // Serialize using the SerDe, then below deserialize using DeserializeRead.
    Object[] serdeRow = new Object[columnCount];
    for (int i = 0; i < rowCount; i++) {
        Object[] row = rows[i];
        // LazySimple seems to work better with an row object array instead of a Java object...
        for (int index = 0; index < columnCount; index++) {
            serdeRow[index] = row[index];
        }
        Text serialized = (Text) serde.serialize(serdeRow, rowOI);
        byte[] bytes1 = Arrays.copyOfRange(serialized.getBytes(), 0, serialized.getLength());
        byte[] bytes2 = Arrays.copyOfRange(serializeWriteBytes[i].getBytes(), 0, serializeWriteBytes[i].getLength());
        if (!Arrays.equals(bytes1, bytes2)) {
            fail("SerializeWrite and SerDe serialization does not match");
        }
        serdeBytes[i] = copyBytes(serialized);
    }
    // Try to deserialize using DeserializeRead our Writable row objects created by SerDe.
    for (int i = 0; i < rowCount; i++) {
        Object[] row = rows[i];
        LazySimpleDeserializeRead lazySimpleDeserializeRead = new LazySimpleDeserializeRead(writeTypeInfos, /* useExternalBuffer */
        false, serdeParams);
        byte[] bytes = serdeBytes[i];
        lazySimpleDeserializeRead.set(bytes, 0, bytes.length);
        for (int index = 0; index < columnCount; index++) {
            if (useIncludeColumns && !columnsToInclude[index]) {
                lazySimpleDeserializeRead.skipNextField();
            } else if (index >= writeColumnCount) {
                // Should come back a null.
                verifyReadNull(lazySimpleDeserializeRead, typeInfos[index]);
            } else {
                Object expectedObject = row[index];
                verifyRead(lazySimpleDeserializeRead, typeInfos[index], expectedObject);
            }
        }
        if (writeColumnCount == columnCount) {
            assertTrue(lazySimpleDeserializeRead.isEndOfInputReached());
        }
    }
}
Also used : LazySimpleDeserializeRead(org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleDeserializeRead) BytesWritable(org.apache.hadoop.io.BytesWritable) Text(org.apache.hadoop.io.Text) LazySimpleSerializeWrite(org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleSerializeWrite) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) Output(org.apache.hadoop.hive.serde2.ByteStream.Output) UnionObject(org.apache.hadoop.hive.serde2.objectinspector.UnionObject)

Example 12 with SerdeRandomRowSource

use of org.apache.hadoop.hive.serde2.SerdeRandomRowSource in project hive by apache.

the class TestLazySimpleFast method testLazySimpleFastCase.

public void testLazySimpleFastCase(int caseNum, boolean doNonRandomFill, Random r, SerdeRandomRowSource.SupportedTypes supportedTypes, int depth) throws Throwable {
    SerdeRandomRowSource source = new SerdeRandomRowSource();
    source.init(r, supportedTypes, depth);
    int rowCount = 100;
    Object[][] rows = source.randomRows(rowCount);
    if (doNonRandomFill) {
        MyTestClass.nonRandomRowFill(rows, source.primitiveCategories());
    }
    StructObjectInspector rowStructObjectInspector = source.rowStructObjectInspector();
    TypeInfo[] typeInfos = source.typeInfos();
    int columnCount = typeInfos.length;
    int writeColumnCount = columnCount;
    StructObjectInspector writeRowStructObjectInspector = rowStructObjectInspector;
    boolean doWriteFewerColumns = r.nextBoolean();
    if (doWriteFewerColumns) {
        writeColumnCount = 1 + r.nextInt(columnCount);
        if (writeColumnCount == columnCount) {
            doWriteFewerColumns = false;
        } else {
            writeRowStructObjectInspector = source.partialRowStructObjectInspector(writeColumnCount);
        }
    }
    String fieldNames = ObjectInspectorUtils.getFieldNames(rowStructObjectInspector);
    String fieldTypes = ObjectInspectorUtils.getFieldTypes(rowStructObjectInspector);
    // Use different separator values.
    byte[] separators = new byte[] { (byte) 9, (byte) 2, (byte) 3, (byte) 4, (byte) 5, (byte) 6, (byte) 7, (byte) 8 };
    LazySimpleSerDe serde = getSerDe(fieldNames, fieldTypes);
    LazySerDeParameters serdeParams = getSerDeParams(fieldNames, fieldTypes, separators);
    LazySimpleSerDe serde_fewer = null;
    LazySerDeParameters serdeParams_fewer = null;
    if (doWriteFewerColumns) {
        String partialFieldNames = ObjectInspectorUtils.getFieldNames(writeRowStructObjectInspector);
        String partialFieldTypes = ObjectInspectorUtils.getFieldTypes(writeRowStructObjectInspector);
        serde_fewer = getSerDe(fieldNames, fieldTypes);
        serdeParams_fewer = getSerDeParams(partialFieldNames, partialFieldTypes, separators);
    }
    testLazySimpleFast(source, rows, serde, rowStructObjectInspector, serde_fewer, writeRowStructObjectInspector, serdeParams, serdeParams_fewer, typeInfos, /* useIncludeColumns */
    false, /* doWriteFewerColumns */
    false, r);
    testLazySimpleFast(source, rows, serde, rowStructObjectInspector, serde_fewer, writeRowStructObjectInspector, serdeParams, serdeParams_fewer, typeInfos, /* useIncludeColumns */
    true, /* doWriteFewerColumns */
    false, r);
    if (doWriteFewerColumns) {
        testLazySimpleFast(source, rows, serde, rowStructObjectInspector, serde_fewer, writeRowStructObjectInspector, serdeParams, serdeParams_fewer, typeInfos, /* useIncludeColumns */
        false, /* doWriteFewerColumns */
        true, r);
        testLazySimpleFast(source, rows, serde, rowStructObjectInspector, serde_fewer, writeRowStructObjectInspector, serdeParams, serdeParams_fewer, typeInfos, /* useIncludeColumns */
        true, /* doWriteFewerColumns */
        true, r);
    }
}
Also used : SerdeRandomRowSource(org.apache.hadoop.hive.serde2.SerdeRandomRowSource) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Aggregations

Output (org.apache.hadoop.hive.serde2.ByteStream.Output)6 SerdeRandomRowSource (org.apache.hadoop.hive.serde2.SerdeRandomRowSource)6 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)6 BytesWritable (org.apache.hadoop.io.BytesWritable)6 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)5 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)5 AbstractSerDe (org.apache.hadoop.hive.serde2.AbstractSerDe)4 UnionObject (org.apache.hadoop.hive.serde2.objectinspector.UnionObject)3 Writable (org.apache.hadoop.io.Writable)3 EOFException (java.io.EOFException)2 ArrayList (java.util.ArrayList)2 BinarySortableDeserializeRead (org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead)2 BinarySortableSerializeWrite (org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite)2 LazySimpleDeserializeRead (org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleDeserializeRead)2 LazySimpleSerializeWrite (org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleSerializeWrite)2 LazyBinaryDeserializeRead (org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead)2 LazyBinarySerializeWrite (org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinarySerializeWrite)2 StructTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo)2 Text (org.apache.hadoop.io.Text)2