Search in sources :

Example 1 with LazyBinaryDeserializeRead

use of org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead in project hive by apache.

the class TestVectorSerDeRow method testVectorSerializeRow.

void testVectorSerializeRow(Random r, SerializationType serializationType) throws HiveException, IOException, SerDeException {
    String[] emptyScratchTypeNames = new String[0];
    VectorRandomRowSource source = new VectorRandomRowSource();
    source.init(r);
    VectorizedRowBatchCtx batchContext = new VectorizedRowBatchCtx();
    batchContext.init(source.rowStructObjectInspector(), emptyScratchTypeNames);
    VectorizedRowBatch batch = batchContext.createVectorizedRowBatch();
    VectorAssignRow vectorAssignRow = new VectorAssignRow();
    vectorAssignRow.init(source.typeNames());
    int fieldCount = source.typeNames().size();
    DeserializeRead deserializeRead;
    SerializeWrite serializeWrite;
    switch(serializationType) {
        case BINARY_SORTABLE:
            deserializeRead = new BinarySortableDeserializeRead(source.primitiveTypeInfos(), /* useExternalBuffer */
            false);
            serializeWrite = new BinarySortableSerializeWrite(fieldCount);
            break;
        case LAZY_BINARY:
            deserializeRead = new LazyBinaryDeserializeRead(source.primitiveTypeInfos(), /* useExternalBuffer */
            false);
            serializeWrite = new LazyBinarySerializeWrite(fieldCount);
            break;
        case LAZY_SIMPLE:
            {
                StructObjectInspector rowObjectInspector = source.rowStructObjectInspector();
                LazySerDeParameters lazySerDeParams = getSerDeParams(rowObjectInspector);
                byte separator = (byte) '\t';
                deserializeRead = new LazySimpleDeserializeRead(source.primitiveTypeInfos(), /* useExternalBuffer */
                false, separator, lazySerDeParams);
                serializeWrite = new LazySimpleSerializeWrite(fieldCount, separator, lazySerDeParams);
            }
            break;
        default:
            throw new Error("Unknown serialization type " + serializationType);
    }
    VectorSerializeRow vectorSerializeRow = new VectorSerializeRow(serializeWrite);
    vectorSerializeRow.init(source.typeNames());
    Object[][] randomRows = source.randomRows(100000);
    int firstRandomRowIndex = 0;
    for (int i = 0; i < randomRows.length; i++) {
        Object[] row = randomRows[i];
        vectorAssignRow.assignRow(batch, batch.size, row);
        batch.size++;
        if (batch.size == batch.DEFAULT_SIZE) {
            serializeBatch(batch, vectorSerializeRow, deserializeRead, source, randomRows, firstRandomRowIndex);
            firstRandomRowIndex = i + 1;
            batch.reset();
        }
    }
    if (batch.size > 0) {
        serializeBatch(batch, vectorSerializeRow, deserializeRead, source, randomRows, firstRandomRowIndex);
    }
}
Also used : LazySerDeParameters(org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters) DeserializeRead(org.apache.hadoop.hive.serde2.fast.DeserializeRead) BinarySortableDeserializeRead(org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead) LazySimpleDeserializeRead(org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleDeserializeRead) LazyBinaryDeserializeRead(org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead) BinarySortableDeserializeRead(org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead) LazyBinarySerializeWrite(org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinarySerializeWrite) LazySimpleDeserializeRead(org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleDeserializeRead) BinarySortableSerializeWrite(org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite) LazySimpleSerializeWrite(org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleSerializeWrite) LazyBinaryDeserializeRead(org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead) SerializeWrite(org.apache.hadoop.hive.serde2.fast.SerializeWrite) LazyBinarySerializeWrite(org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinarySerializeWrite) BinarySortableSerializeWrite(org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite) LazySimpleSerializeWrite(org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleSerializeWrite) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 2 with LazyBinaryDeserializeRead

use of org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead in project hive by apache.

the class TestLazyBinaryFast method testLazyBinaryFast.

private void testLazyBinaryFast(SerdeRandomRowSource source, Object[][] rows, AbstractSerDe serde, StructObjectInspector rowOI, AbstractSerDe serde_fewer, StructObjectInspector writeRowOI, PrimitiveTypeInfo[] primitiveTypeInfos, boolean useIncludeColumns, boolean doWriteFewerColumns, Random r) throws Throwable {
    int rowCount = rows.length;
    int columnCount = primitiveTypeInfos.length;
    boolean[] columnsToInclude = null;
    if (useIncludeColumns) {
        columnsToInclude = new boolean[columnCount];
        for (int i = 0; i < columnCount; i++) {
            columnsToInclude[i] = r.nextBoolean();
        }
    }
    int writeColumnCount = columnCount;
    PrimitiveTypeInfo[] writePrimitiveTypeInfos = primitiveTypeInfos;
    if (doWriteFewerColumns) {
        writeColumnCount = writeRowOI.getAllStructFieldRefs().size();
        writePrimitiveTypeInfos = Arrays.copyOf(primitiveTypeInfos, writeColumnCount);
    }
    LazyBinarySerializeWrite lazyBinarySerializeWrite = new LazyBinarySerializeWrite(writeColumnCount);
    // Try to serialize
    BytesWritable[] serializeWriteBytes = new BytesWritable[rowCount];
    for (int i = 0; i < rowCount; i++) {
        Object[] row = rows[i];
        Output output = new Output();
        lazyBinarySerializeWrite.set(output);
        for (int index = 0; index < writeColumnCount; index++) {
            Writable writable = (Writable) row[index];
            VerifyFast.serializeWrite(lazyBinarySerializeWrite, primitiveTypeInfos[index], writable);
        }
        BytesWritable bytesWritable = new BytesWritable();
        bytesWritable.set(output.getData(), 0, output.getLength());
        serializeWriteBytes[i] = bytesWritable;
    }
    // Try to deserialize
    for (int i = 0; i < rowCount; i++) {
        Object[] row = rows[i];
        // Specifying the right type info length tells LazyBinaryDeserializeRead which is the last
        // column.
        LazyBinaryDeserializeRead lazyBinaryDeserializeRead = new LazyBinaryDeserializeRead(writePrimitiveTypeInfos, /* useExternalBuffer */
        false);
        BytesWritable bytesWritable = serializeWriteBytes[i];
        lazyBinaryDeserializeRead.set(bytesWritable.getBytes(), 0, bytesWritable.getLength());
        for (int index = 0; index < columnCount; index++) {
            if (useIncludeColumns && !columnsToInclude[index]) {
                lazyBinaryDeserializeRead.skipNextField();
            } else if (index >= writeColumnCount) {
                // Should come back a null.
                VerifyFast.verifyDeserializeRead(lazyBinaryDeserializeRead, primitiveTypeInfos[index], null);
            } else {
                Writable writable = (Writable) row[index];
                VerifyFast.verifyDeserializeRead(lazyBinaryDeserializeRead, primitiveTypeInfos[index], writable);
            }
        }
        if (writeColumnCount == columnCount) {
            TestCase.assertTrue(lazyBinaryDeserializeRead.isEndOfInputReached());
        }
    }
    // Try to deserialize using SerDe class our Writable row objects created by SerializeWrite.
    for (int i = 0; i < rowCount; i++) {
        BytesWritable bytesWritable = serializeWriteBytes[i];
        LazyBinaryStruct lazyBinaryStruct;
        if (doWriteFewerColumns) {
            lazyBinaryStruct = (LazyBinaryStruct) serde_fewer.deserialize(bytesWritable);
        } else {
            lazyBinaryStruct = (LazyBinaryStruct) serde.deserialize(bytesWritable);
        }
        Object[] row = rows[i];
        for (int index = 0; index < writeColumnCount; index++) {
            PrimitiveTypeInfo primitiveTypeInfo = primitiveTypeInfos[index];
            Writable writable = (Writable) row[index];
            Object object = lazyBinaryStruct.getField(index);
            if (writable == null || object == null) {
                if (writable != null || object != null) {
                    fail("SerDe deserialized NULL column mismatch");
                }
            } else {
                if (!object.equals(writable)) {
                    fail("SerDe deserialized value does not match");
                }
            }
        }
    }
    // One Writable per row.
    BytesWritable[] serdeBytes = new BytesWritable[rowCount];
    // Serialize using the SerDe, then below deserialize using DeserializeRead.
    Object[] serdeRow = new Object[writeColumnCount];
    for (int i = 0; i < rowCount; i++) {
        Object[] row = rows[i];
        // LazyBinary seems to work better with an row object array instead of a Java object...
        for (int index = 0; index < writeColumnCount; index++) {
            serdeRow[index] = row[index];
        }
        BytesWritable serialized;
        if (doWriteFewerColumns) {
            serialized = (BytesWritable) serde_fewer.serialize(serdeRow, writeRowOI);
        } else {
            serialized = (BytesWritable) serde.serialize(serdeRow, rowOI);
        }
        BytesWritable bytesWritable = new BytesWritable(Arrays.copyOfRange(serialized.getBytes(), 0, serialized.getLength()));
        byte[] bytes1 = bytesWritable.getBytes();
        BytesWritable lazySerializedWriteBytes = serializeWriteBytes[i];
        byte[] bytes2 = Arrays.copyOfRange(lazySerializedWriteBytes.getBytes(), 0, lazySerializedWriteBytes.getLength());
        if (bytes1.length != bytes2.length) {
            fail("SerializeWrite length " + bytes2.length + " and " + "SerDe serialization length " + bytes1.length + " do not match (" + Arrays.toString(primitiveTypeInfos) + ")");
        }
        if (!Arrays.equals(bytes1, bytes2)) {
            fail("SerializeWrite and SerDe serialization does not match (" + Arrays.toString(primitiveTypeInfos) + ")");
        }
        serdeBytes[i] = bytesWritable;
    }
    // Try to deserialize using DeserializeRead our Writable row objects created by SerDe.
    for (int i = 0; i < rowCount; i++) {
        Object[] row = rows[i];
        // When doWriteFewerColumns, try to read more fields than exist in buffer.
        LazyBinaryDeserializeRead lazyBinaryDeserializeRead = new LazyBinaryDeserializeRead(primitiveTypeInfos, /* useExternalBuffer */
        false);
        BytesWritable bytesWritable = serdeBytes[i];
        lazyBinaryDeserializeRead.set(bytesWritable.getBytes(), 0, bytesWritable.getLength());
        for (int index = 0; index < columnCount; index++) {
            if (useIncludeColumns && !columnsToInclude[index]) {
                lazyBinaryDeserializeRead.skipNextField();
            } else if (index >= writeColumnCount) {
                // Should come back a null.
                VerifyFast.verifyDeserializeRead(lazyBinaryDeserializeRead, primitiveTypeInfos[index], null);
            } else {
                Writable writable = (Writable) row[index];
                VerifyFast.verifyDeserializeRead(lazyBinaryDeserializeRead, primitiveTypeInfos[index], writable);
            }
        }
        if (writeColumnCount == columnCount) {
            TestCase.assertTrue(lazyBinaryDeserializeRead.isEndOfInputReached());
        }
    }
}
Also used : LazyBinarySerializeWrite(org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinarySerializeWrite) Writable(org.apache.hadoop.io.Writable) BytesWritable(org.apache.hadoop.io.BytesWritable) BytesWritable(org.apache.hadoop.io.BytesWritable) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) Output(org.apache.hadoop.hive.serde2.ByteStream.Output) LazyBinaryDeserializeRead(org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead)

Example 3 with LazyBinaryDeserializeRead

use of org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead in project hive by apache.

the class ReduceRecordSource method init.

void init(JobConf jconf, Operator<?> reducer, boolean vectorized, TableDesc keyTableDesc, TableDesc valueTableDesc, Reader reader, boolean handleGroupKey, byte tag, VectorizedRowBatchCtx batchContext, long vectorizedVertexNum, int vectorizedTestingReducerBatchSize) throws Exception {
    this.vectorizedVertexNum = vectorizedVertexNum;
    if (vectorizedTestingReducerBatchSize > VectorizedRowBatch.DEFAULT_SIZE) {
        // For now, we don't go higher than the default batch size unless we do more work
        // to verify every vectorized operator downstream can handle a larger batch size.
        vectorizedTestingReducerBatchSize = VectorizedRowBatch.DEFAULT_SIZE;
    }
    this.vectorizedTestingReducerBatchSize = vectorizedTestingReducerBatchSize;
    ObjectInspector keyObjectInspector;
    this.reducer = reducer;
    this.vectorized = vectorized;
    this.keyTableDesc = keyTableDesc;
    if (reader instanceof KeyValueReader) {
        this.reader = new KeyValuesFromKeyValue((KeyValueReader) reader);
    } else {
        this.reader = new KeyValuesFromKeyValues((KeyValuesReader) reader);
    }
    this.handleGroupKey = handleGroupKey;
    this.tag = tag;
    try {
        inputKeyDeserializer = ReflectionUtils.newInstance(keyTableDesc.getDeserializerClass(), null);
        SerDeUtils.initializeSerDe(inputKeyDeserializer, null, keyTableDesc.getProperties(), null);
        keyObjectInspector = inputKeyDeserializer.getObjectInspector();
        if (vectorized) {
            keyStructInspector = (StructObjectInspector) keyObjectInspector;
            firstValueColumnOffset = keyStructInspector.getAllStructFieldRefs().size();
        }
        // We should initialize the SerDe with the TypeInfo when available.
        this.valueTableDesc = valueTableDesc;
        inputValueDeserializer = (AbstractSerDe) ReflectionUtils.newInstance(valueTableDesc.getDeserializerClass(), null);
        SerDeUtils.initializeSerDe(inputValueDeserializer, null, valueTableDesc.getProperties(), null);
        valueObjectInspector = inputValueDeserializer.getObjectInspector();
        ArrayList<ObjectInspector> ois = new ArrayList<ObjectInspector>();
        if (vectorized) {
            /* vectorization only works with struct object inspectors */
            valueStructInspectors = (StructObjectInspector) valueObjectInspector;
            final int totalColumns = firstValueColumnOffset + valueStructInspectors.getAllStructFieldRefs().size();
            rowObjectInspector = Utilities.constructVectorizedReduceRowOI(keyStructInspector, valueStructInspectors);
            batch = batchContext.createVectorizedRowBatch();
            // Setup vectorized deserialization for the key and value.
            BinarySortableSerDe binarySortableSerDe = (BinarySortableSerDe) inputKeyDeserializer;
            keyBinarySortableDeserializeToRow = new VectorDeserializeRow<BinarySortableDeserializeRead>(new BinarySortableDeserializeRead(VectorizedBatchUtil.typeInfosFromStructObjectInspector(keyStructInspector), /* useExternalBuffer */
            true, binarySortableSerDe.getSortOrders(), binarySortableSerDe.getNullMarkers(), binarySortableSerDe.getNotNullMarkers()));
            keyBinarySortableDeserializeToRow.init(0);
            final int valuesSize = valueStructInspectors.getAllStructFieldRefs().size();
            if (valuesSize > 0) {
                valueLazyBinaryDeserializeToRow = new VectorDeserializeRow<LazyBinaryDeserializeRead>(new LazyBinaryDeserializeRead(VectorizedBatchUtil.typeInfosFromStructObjectInspector(valueStructInspectors), /* useExternalBuffer */
                true));
                valueLazyBinaryDeserializeToRow.init(firstValueColumnOffset);
                // Create data buffers for value bytes column vectors.
                for (int i = firstValueColumnOffset; i < batch.numCols; i++) {
                    ColumnVector colVector = batch.cols[i];
                    if (colVector instanceof BytesColumnVector) {
                        BytesColumnVector bytesColumnVector = (BytesColumnVector) colVector;
                        bytesColumnVector.initBuffer();
                    }
                }
            }
        } else {
            ois.add(keyObjectInspector);
            ois.add(valueObjectInspector);
            rowObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(Utilities.reduceFieldNameList, ois);
        }
    } catch (Throwable e) {
        abort = true;
        if (e instanceof OutOfMemoryError) {
            // Don't create a new object if we are already out of memory
            throw (OutOfMemoryError) e;
        } else {
            throw new RuntimeException("Reduce operator initialization failed", e);
        }
    }
    perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.TEZ_INIT_OPERATORS);
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) BinarySortableSerDe(org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe) KeyValueReader(org.apache.tez.runtime.library.api.KeyValueReader) ArrayList(java.util.ArrayList) BinarySortableDeserializeRead(org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) ColumnVector(org.apache.hadoop.hive.ql.exec.vector.ColumnVector) KeyValuesReader(org.apache.tez.runtime.library.api.KeyValuesReader) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) LazyBinaryDeserializeRead(org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead)

Example 4 with LazyBinaryDeserializeRead

use of org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead in project hive by apache.

the class CheckFastRowHashMap method verifyHashMapRows.

public static void verifyHashMapRows(List<Object[]> rows, int[] actualToValueMap, VectorMapJoinHashMapResult hashMapResult, TypeInfo[] typeInfos) throws IOException {
    final int count = rows.size();
    final int columnCount = typeInfos.length;
    WriteBuffers.ByteSegmentRef ref = hashMapResult.first();
    for (int a = 0; a < count; a++) {
        int valueIndex = actualToValueMap[a];
        Object[] row = rows.get(valueIndex);
        byte[] bytes = ref.getBytes();
        int offset = (int) ref.getOffset();
        int length = ref.getLength();
        LazyBinaryDeserializeRead lazyBinaryDeserializeRead = new LazyBinaryDeserializeRead(typeInfos, /* useExternalBuffer */
        false);
        lazyBinaryDeserializeRead.set(bytes, offset, length);
        for (int index = 0; index < columnCount; index++) {
            verifyRead(lazyBinaryDeserializeRead, typeInfos[index], row[index]);
        }
        TestCase.assertTrue(lazyBinaryDeserializeRead.isEndOfInputReached());
        ref = hashMapResult.next();
        if (a == count - 1) {
            TestCase.assertTrue(ref == null);
        } else {
            TestCase.assertTrue(ref != null);
        }
    }
}
Also used : WriteBuffers(org.apache.hadoop.hive.serde2.WriteBuffers) UnionObject(org.apache.hadoop.hive.serde2.objectinspector.UnionObject) LazyBinaryDeserializeRead(org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead)

Example 5 with LazyBinaryDeserializeRead

use of org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead in project hive by apache.

the class TestLazyBinaryFast method testLazyBinaryFast.

private void testLazyBinaryFast(SerdeRandomRowSource source, Object[][] rows, AbstractSerDe serde, StructObjectInspector rowOI, AbstractSerDe serde_fewer, StructObjectInspector writeRowOI, TypeInfo[] typeInfos, boolean useIncludeColumns, boolean doWriteFewerColumns, Random r) throws Throwable {
    int rowCount = rows.length;
    int columnCount = typeInfos.length;
    boolean[] columnsToInclude = null;
    if (useIncludeColumns) {
        columnsToInclude = new boolean[columnCount];
        for (int i = 0; i < columnCount; i++) {
            columnsToInclude[i] = r.nextBoolean();
        }
    }
    int writeColumnCount = columnCount;
    TypeInfo[] writeTypeInfos = typeInfos;
    if (doWriteFewerColumns) {
        writeColumnCount = writeRowOI.getAllStructFieldRefs().size();
        writeTypeInfos = Arrays.copyOf(typeInfos, writeColumnCount);
    }
    LazyBinarySerializeWrite lazyBinarySerializeWrite = new LazyBinarySerializeWrite(writeColumnCount);
    // Try to serialize
    BytesWritable[] serializeWriteBytes = new BytesWritable[rowCount];
    for (int i = 0; i < rowCount; i++) {
        Object[] row = rows[i];
        Output output = new Output();
        lazyBinarySerializeWrite.set(output);
        for (int index = 0; index < writeColumnCount; index++) {
            VerifyFast.serializeWrite(lazyBinarySerializeWrite, typeInfos[index], row[index]);
        }
        BytesWritable bytesWritable = new BytesWritable();
        bytesWritable.set(output.getData(), 0, output.getLength());
        serializeWriteBytes[i] = bytesWritable;
    }
    // Try to deserialize
    for (int i = 0; i < rowCount; i++) {
        Object[] row = rows[i];
        // Specifying the right type info length tells LazyBinaryDeserializeRead which is the last
        // column.
        LazyBinaryDeserializeRead lazyBinaryDeserializeRead = new LazyBinaryDeserializeRead(writeTypeInfos, /* useExternalBuffer */
        false);
        BytesWritable bytesWritable = serializeWriteBytes[i];
        lazyBinaryDeserializeRead.set(bytesWritable.getBytes(), 0, bytesWritable.getLength());
        for (int index = 0; index < columnCount; index++) {
            if (useIncludeColumns && !columnsToInclude[index]) {
                lazyBinaryDeserializeRead.skipNextField();
            } else if (index >= writeColumnCount) {
                // Should come back a null.
                VerifyFast.verifyDeserializeRead(lazyBinaryDeserializeRead, typeInfos[index], null);
            } else {
                verifyRead(lazyBinaryDeserializeRead, typeInfos[index], row[index]);
            }
        }
        if (writeColumnCount == columnCount) {
            TestCase.assertTrue(lazyBinaryDeserializeRead.isEndOfInputReached());
        }
    }
    // Try to deserialize using SerDe class our Writable row objects created by SerializeWrite.
    for (int i = 0; i < rowCount; i++) {
        BytesWritable bytesWritable = serializeWriteBytes[i];
        LazyBinaryStruct lazyBinaryStruct;
        if (doWriteFewerColumns) {
            lazyBinaryStruct = (LazyBinaryStruct) serde_fewer.deserialize(bytesWritable);
        } else {
            lazyBinaryStruct = (LazyBinaryStruct) serde.deserialize(bytesWritable);
        }
        Object[] row = rows[i];
        for (int index = 0; index < writeColumnCount; index++) {
            TypeInfo typeInfo = typeInfos[index];
            Object object = lazyBinaryStruct.getField(index);
            if (row[index] == null || object == null) {
                if (row[index] != null || object != null) {
                    fail("SerDe deserialized NULL column mismatch");
                }
            } else {
                if (!VerifyLazy.lazyCompare(typeInfo, object, row[index])) {
                    fail("SerDe deserialized value does not match");
                }
            }
        }
    }
    // One Writable per row.
    BytesWritable[] serdeBytes = new BytesWritable[rowCount];
    // Serialize using the SerDe, then below deserialize using DeserializeRead.
    Object[] serdeRow = new Object[writeColumnCount];
    for (int i = 0; i < rowCount; i++) {
        Object[] row = rows[i];
        // LazyBinary seems to work better with an row object array instead of a Java object...
        for (int index = 0; index < writeColumnCount; index++) {
            serdeRow[index] = row[index];
        }
        BytesWritable serialized;
        if (doWriteFewerColumns) {
            serialized = (BytesWritable) serde_fewer.serialize(serdeRow, writeRowOI);
        } else {
            serialized = (BytesWritable) serde.serialize(serdeRow, rowOI);
        }
        BytesWritable bytesWritable = new BytesWritable(Arrays.copyOfRange(serialized.getBytes(), 0, serialized.getLength()));
        byte[] bytes1 = bytesWritable.getBytes();
        BytesWritable lazySerializedWriteBytes = serializeWriteBytes[i];
        byte[] bytes2 = Arrays.copyOfRange(lazySerializedWriteBytes.getBytes(), 0, lazySerializedWriteBytes.getLength());
        if (bytes1.length != bytes2.length) {
            fail("SerializeWrite length " + bytes2.length + " and " + "SerDe serialization length " + bytes1.length + " do not match (" + Arrays.toString(typeInfos) + ")");
        }
        if (!Arrays.equals(bytes1, bytes2)) {
            fail("SerializeWrite and SerDe serialization does not match (" + Arrays.toString(typeInfos) + ")");
        }
        serdeBytes[i] = bytesWritable;
    }
    // Try to deserialize using DeserializeRead our Writable row objects created by SerDe.
    for (int i = 0; i < rowCount; i++) {
        Object[] row = rows[i];
        // When doWriteFewerColumns, try to read more fields than exist in buffer.
        LazyBinaryDeserializeRead lazyBinaryDeserializeRead = new LazyBinaryDeserializeRead(typeInfos, /* useExternalBuffer */
        false);
        BytesWritable bytesWritable = serdeBytes[i];
        lazyBinaryDeserializeRead.set(bytesWritable.getBytes(), 0, bytesWritable.getLength());
        for (int index = 0; index < columnCount; index++) {
            if (useIncludeColumns && !columnsToInclude[index]) {
                lazyBinaryDeserializeRead.skipNextField();
            } else if (index >= writeColumnCount) {
                // Should come back a null.
                VerifyFast.verifyDeserializeRead(lazyBinaryDeserializeRead, typeInfos[index], null);
            } else {
                verifyRead(lazyBinaryDeserializeRead, typeInfos[index], row[index]);
            }
        }
        if (writeColumnCount == columnCount) {
            TestCase.assertTrue(lazyBinaryDeserializeRead.isEndOfInputReached());
        }
    }
}
Also used : LazyBinarySerializeWrite(org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinarySerializeWrite) BytesWritable(org.apache.hadoop.io.BytesWritable) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) Output(org.apache.hadoop.hive.serde2.ByteStream.Output) UnionObject(org.apache.hadoop.hive.serde2.objectinspector.UnionObject) LazyBinaryDeserializeRead(org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead)

Aggregations

LazyBinaryDeserializeRead (org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead)13 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)8 BinarySortableDeserializeRead (org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead)7 LazyBinarySerializeWrite (org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinarySerializeWrite)7 UnionObject (org.apache.hadoop.hive.serde2.objectinspector.UnionObject)5 IOException (java.io.IOException)4 Output (org.apache.hadoop.hive.serde2.ByteStream.Output)4 BinarySortableSerializeWrite (org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite)4 DeserializeRead (org.apache.hadoop.hive.serde2.fast.DeserializeRead)4 SerializeWrite (org.apache.hadoop.hive.serde2.fast.SerializeWrite)4 LazySerDeParameters (org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters)4 LazySimpleDeserializeRead (org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleDeserializeRead)4 LazySimpleSerializeWrite (org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleSerializeWrite)4 ArrayList (java.util.ArrayList)3 BytesColumnVector (org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector)3 ColumnVector (org.apache.hadoop.hive.ql.exec.vector.ColumnVector)3 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)3 SerDeException (org.apache.hadoop.hive.serde2.SerDeException)3 BinarySortableSerDe (org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe)3 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)3