Search in sources :

Example 1 with DeserializeRead

use of org.apache.hadoop.hive.serde2.fast.DeserializeRead in project hive by apache.

the class TestVectorSerDeRow method deserializeAndVerify.

void deserializeAndVerify(Output output, DeserializeRead deserializeRead, VectorRandomRowSource source, Object[] expectedRow) throws HiveException, IOException {
    deserializeRead.set(output.getData(), 0, output.getLength());
    PrimitiveCategory[] primitiveCategories = source.primitiveCategories();
    for (int i = 0; i < primitiveCategories.length; i++) {
        Object expected = expectedRow[i];
        PrimitiveCategory primitiveCategory = primitiveCategories[i];
        PrimitiveTypeInfo primitiveTypeInfo = source.primitiveTypeInfos()[i];
        if (!deserializeRead.readNextField()) {
            throw new HiveException("Unexpected NULL when reading primitiveCategory " + primitiveCategory + " expected (" + expected.getClass().getName() + ", " + expected.toString() + ") " + " deserializeRead " + deserializeRead.getClass().getName());
        }
        switch(primitiveCategory) {
            case BOOLEAN:
                {
                    Boolean value = deserializeRead.currentBoolean;
                    BooleanWritable expectedWritable = (BooleanWritable) expected;
                    if (!value.equals(expectedWritable.get())) {
                        TestCase.fail("Boolean field mismatch (expected " + expected + " found " + value + ")");
                    }
                }
                break;
            case BYTE:
                {
                    Byte value = deserializeRead.currentByte;
                    ByteWritable expectedWritable = (ByteWritable) expected;
                    if (!value.equals(expectedWritable.get())) {
                        TestCase.fail("Byte field mismatch (expected " + (int) expected + " found " + (int) value + ")");
                    }
                }
                break;
            case SHORT:
                {
                    Short value = deserializeRead.currentShort;
                    ShortWritable expectedWritable = (ShortWritable) expected;
                    if (!value.equals(expectedWritable.get())) {
                        TestCase.fail("Short field mismatch (expected " + expected + " found " + value + ")");
                    }
                }
                break;
            case INT:
                {
                    Integer value = deserializeRead.currentInt;
                    IntWritable expectedWritable = (IntWritable) expected;
                    if (!value.equals(expectedWritable.get())) {
                        TestCase.fail("Int field mismatch (expected " + expected + " found " + value + ")");
                    }
                }
                break;
            case LONG:
                {
                    Long value = deserializeRead.currentLong;
                    LongWritable expectedWritable = (LongWritable) expected;
                    if (!value.equals(expectedWritable.get())) {
                        TestCase.fail("Long field mismatch (expected " + expected + " found " + value + ")");
                    }
                }
                break;
            case DATE:
                {
                    DateWritable value = deserializeRead.currentDateWritable;
                    DateWritable expectedWritable = (DateWritable) expected;
                    if (!value.equals(expectedWritable)) {
                        TestCase.fail("Date field mismatch (expected " + expected.toString() + " found " + value.toString() + ")");
                    }
                }
                break;
            case FLOAT:
                {
                    Float value = deserializeRead.currentFloat;
                    FloatWritable expectedWritable = (FloatWritable) expected;
                    if (!value.equals(expectedWritable.get())) {
                        TestCase.fail("Float field mismatch (expected " + expected + " found " + value + ")");
                    }
                }
                break;
            case DOUBLE:
                {
                    Double value = deserializeRead.currentDouble;
                    DoubleWritable expectedWritable = (DoubleWritable) expected;
                    if (!value.equals(expectedWritable.get())) {
                        TestCase.fail("Double field mismatch (expected " + expected + " found " + value + ")");
                    }
                }
                break;
            case STRING:
            case CHAR:
            case VARCHAR:
            case BINARY:
                {
                    byte[] stringBytes = Arrays.copyOfRange(deserializeRead.currentBytes, deserializeRead.currentBytesStart, deserializeRead.currentBytesStart + deserializeRead.currentBytesLength);
                    Text text = new Text(stringBytes);
                    String string = text.toString();
                    switch(primitiveCategory) {
                        case STRING:
                            {
                                Text expectedWritable = (Text) expected;
                                if (!string.equals(expectedWritable.toString())) {
                                    TestCase.fail("String field mismatch (expected '" + expectedWritable.toString() + "' found '" + string + "')");
                                }
                            }
                            break;
                        case CHAR:
                            {
                                HiveChar hiveChar = new HiveChar(string, ((CharTypeInfo) primitiveTypeInfo).getLength());
                                HiveCharWritable expectedWritable = (HiveCharWritable) expected;
                                if (!hiveChar.equals(expectedWritable.getHiveChar())) {
                                    TestCase.fail("Char field mismatch (expected '" + expectedWritable.getHiveChar() + "' found '" + hiveChar + "')");
                                }
                            }
                            break;
                        case VARCHAR:
                            {
                                HiveVarchar hiveVarchar = new HiveVarchar(string, ((VarcharTypeInfo) primitiveTypeInfo).getLength());
                                HiveVarcharWritable expectedWritable = (HiveVarcharWritable) expected;
                                if (!hiveVarchar.equals(expectedWritable.getHiveVarchar())) {
                                    TestCase.fail("Varchar field mismatch (expected '" + expectedWritable.getHiveVarchar() + "' found '" + hiveVarchar + "')");
                                }
                            }
                            break;
                        case BINARY:
                            {
                                BytesWritable expectedWritable = (BytesWritable) expected;
                                if (stringBytes.length != expectedWritable.getLength()) {
                                    TestCase.fail("Byte Array field mismatch (expected " + expected + " found " + stringBytes + ")");
                                }
                                byte[] expectedBytes = expectedWritable.getBytes();
                                for (int b = 0; b < stringBytes.length; b++) {
                                    if (stringBytes[b] != expectedBytes[b]) {
                                        TestCase.fail("Byte Array field mismatch (expected " + expected + " found " + stringBytes + ")");
                                    }
                                }
                            }
                            break;
                        default:
                            throw new HiveException("Unexpected primitive category " + primitiveCategory);
                    }
                }
                break;
            case DECIMAL:
                {
                    HiveDecimal value = deserializeRead.currentHiveDecimalWritable.getHiveDecimal();
                    if (value == null) {
                        TestCase.fail("Decimal field evaluated to NULL");
                    }
                    HiveDecimalWritable expectedWritable = (HiveDecimalWritable) expected;
                    if (!value.equals(expectedWritable.getHiveDecimal())) {
                        DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) primitiveTypeInfo;
                        int precision = decimalTypeInfo.getPrecision();
                        int scale = decimalTypeInfo.getScale();
                        TestCase.fail("Decimal field mismatch (expected " + expectedWritable.getHiveDecimal() + " found " + value.toString() + ") precision " + precision + ", scale " + scale);
                    }
                }
                break;
            case TIMESTAMP:
                {
                    Timestamp value = deserializeRead.currentTimestampWritable.getTimestamp();
                    TimestampWritable expectedWritable = (TimestampWritable) expected;
                    if (!value.equals(expectedWritable.getTimestamp())) {
                        TestCase.fail("Timestamp field mismatch (expected " + expectedWritable.getTimestamp() + " found " + value.toString() + ")");
                    }
                }
                break;
            case INTERVAL_YEAR_MONTH:
                {
                    HiveIntervalYearMonth value = deserializeRead.currentHiveIntervalYearMonthWritable.getHiveIntervalYearMonth();
                    HiveIntervalYearMonthWritable expectedWritable = (HiveIntervalYearMonthWritable) expected;
                    HiveIntervalYearMonth expectedValue = expectedWritable.getHiveIntervalYearMonth();
                    if (!value.equals(expectedValue)) {
                        TestCase.fail("HiveIntervalYearMonth field mismatch (expected " + expectedValue + " found " + value.toString() + ")");
                    }
                }
                break;
            case INTERVAL_DAY_TIME:
                {
                    HiveIntervalDayTime value = deserializeRead.currentHiveIntervalDayTimeWritable.getHiveIntervalDayTime();
                    HiveIntervalDayTimeWritable expectedWritable = (HiveIntervalDayTimeWritable) expected;
                    HiveIntervalDayTime expectedValue = expectedWritable.getHiveIntervalDayTime();
                    if (!value.equals(expectedValue)) {
                        TestCase.fail("HiveIntervalDayTime field mismatch (expected " + expectedValue + " found " + value.toString() + ")");
                    }
                }
                break;
            default:
                throw new HiveException("Unexpected primitive category " + primitiveCategory);
        }
    }
    TestCase.assertTrue(deserializeRead.isEndOfInputReached());
}
Also used : HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) HiveChar(org.apache.hadoop.hive.common.type.HiveChar) TimestampWritable(org.apache.hadoop.hive.serde2.io.TimestampWritable) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) Timestamp(java.sql.Timestamp) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) HiveDecimal(org.apache.hadoop.hive.common.type.HiveDecimal) LongWritable(org.apache.hadoop.io.LongWritable) PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) ByteWritable(org.apache.hadoop.hive.serde2.io.ByteWritable) IntWritable(org.apache.hadoop.io.IntWritable) HiveIntervalDayTime(org.apache.hadoop.hive.common.type.HiveIntervalDayTime) DateWritable(org.apache.hadoop.hive.serde2.io.DateWritable) HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable) HiveCharWritable(org.apache.hadoop.hive.serde2.io.HiveCharWritable) HiveVarcharWritable(org.apache.hadoop.hive.serde2.io.HiveVarcharWritable) Text(org.apache.hadoop.io.Text) BytesWritable(org.apache.hadoop.io.BytesWritable) HiveIntervalDayTimeWritable(org.apache.hadoop.hive.serde2.io.HiveIntervalDayTimeWritable) HiveVarchar(org.apache.hadoop.hive.common.type.HiveVarchar) HiveIntervalYearMonthWritable(org.apache.hadoop.hive.serde2.io.HiveIntervalYearMonthWritable) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) FloatWritable(org.apache.hadoop.io.FloatWritable) HiveIntervalYearMonth(org.apache.hadoop.hive.common.type.HiveIntervalYearMonth) BooleanWritable(org.apache.hadoop.io.BooleanWritable)

Example 2 with DeserializeRead

use of org.apache.hadoop.hive.serde2.fast.DeserializeRead in project hive by apache.

the class TestVectorSerDeRow method testVectorSerializeRow.

void testVectorSerializeRow(Random r, SerializationType serializationType) throws HiveException, IOException, SerDeException {
    String[] emptyScratchTypeNames = new String[0];
    VectorRandomRowSource source = new VectorRandomRowSource();
    source.init(r);
    VectorizedRowBatchCtx batchContext = new VectorizedRowBatchCtx();
    batchContext.init(source.rowStructObjectInspector(), emptyScratchTypeNames);
    VectorizedRowBatch batch = batchContext.createVectorizedRowBatch();
    VectorAssignRow vectorAssignRow = new VectorAssignRow();
    vectorAssignRow.init(source.typeNames());
    int fieldCount = source.typeNames().size();
    DeserializeRead deserializeRead;
    SerializeWrite serializeWrite;
    switch(serializationType) {
        case BINARY_SORTABLE:
            deserializeRead = new BinarySortableDeserializeRead(source.primitiveTypeInfos(), /* useExternalBuffer */
            false);
            serializeWrite = new BinarySortableSerializeWrite(fieldCount);
            break;
        case LAZY_BINARY:
            deserializeRead = new LazyBinaryDeserializeRead(source.primitiveTypeInfos(), /* useExternalBuffer */
            false);
            serializeWrite = new LazyBinarySerializeWrite(fieldCount);
            break;
        case LAZY_SIMPLE:
            {
                StructObjectInspector rowObjectInspector = source.rowStructObjectInspector();
                LazySerDeParameters lazySerDeParams = getSerDeParams(rowObjectInspector);
                byte separator = (byte) '\t';
                deserializeRead = new LazySimpleDeserializeRead(source.primitiveTypeInfos(), /* useExternalBuffer */
                false, separator, lazySerDeParams);
                serializeWrite = new LazySimpleSerializeWrite(fieldCount, separator, lazySerDeParams);
            }
            break;
        default:
            throw new Error("Unknown serialization type " + serializationType);
    }
    VectorSerializeRow vectorSerializeRow = new VectorSerializeRow(serializeWrite);
    vectorSerializeRow.init(source.typeNames());
    Object[][] randomRows = source.randomRows(100000);
    int firstRandomRowIndex = 0;
    for (int i = 0; i < randomRows.length; i++) {
        Object[] row = randomRows[i];
        vectorAssignRow.assignRow(batch, batch.size, row);
        batch.size++;
        if (batch.size == batch.DEFAULT_SIZE) {
            serializeBatch(batch, vectorSerializeRow, deserializeRead, source, randomRows, firstRandomRowIndex);
            firstRandomRowIndex = i + 1;
            batch.reset();
        }
    }
    if (batch.size > 0) {
        serializeBatch(batch, vectorSerializeRow, deserializeRead, source, randomRows, firstRandomRowIndex);
    }
}
Also used : LazySerDeParameters(org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters) DeserializeRead(org.apache.hadoop.hive.serde2.fast.DeserializeRead) BinarySortableDeserializeRead(org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead) LazySimpleDeserializeRead(org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleDeserializeRead) LazyBinaryDeserializeRead(org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead) BinarySortableDeserializeRead(org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead) LazyBinarySerializeWrite(org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinarySerializeWrite) LazySimpleDeserializeRead(org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleDeserializeRead) BinarySortableSerializeWrite(org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite) LazySimpleSerializeWrite(org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleSerializeWrite) LazyBinaryDeserializeRead(org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead) SerializeWrite(org.apache.hadoop.hive.serde2.fast.SerializeWrite) LazyBinarySerializeWrite(org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinarySerializeWrite) BinarySortableSerializeWrite(org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite) LazySimpleSerializeWrite(org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleSerializeWrite) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 3 with DeserializeRead

use of org.apache.hadoop.hive.serde2.fast.DeserializeRead in project hive by apache.

the class TestVectorSerDeRow method serializeBatch.

void serializeBatch(VectorizedRowBatch batch, VectorSerializeRow vectorSerializeRow, DeserializeRead deserializeRead, VectorRandomRowSource source, Object[][] randomRows, int firstRandomRowIndex) throws HiveException, IOException {
    Output output = new Output();
    for (int i = 0; i < batch.size; i++) {
        output.reset();
        vectorSerializeRow.setOutput(output);
        vectorSerializeRow.serializeWrite(batch, i);
        Object[] expectedRow = randomRows[firstRandomRowIndex + i];
        byte[] bytes = output.getData();
        int length = output.getLength();
        char[] chars = new char[length];
        for (int c = 0; c < chars.length; c++) {
            chars[c] = (char) (bytes[c] & 0xFF);
        }
        deserializeAndVerify(output, deserializeRead, source, expectedRow);
    }
}
Also used : Output(org.apache.hadoop.hive.serde2.ByteStream.Output)

Example 4 with DeserializeRead

use of org.apache.hadoop.hive.serde2.fast.DeserializeRead in project hive by apache.

the class TestLazyBinaryFast method testLazyBinaryFast.

private void testLazyBinaryFast(SerdeRandomRowSource source, Object[][] rows, AbstractSerDe serde, StructObjectInspector rowOI, AbstractSerDe serde_fewer, StructObjectInspector writeRowOI, PrimitiveTypeInfo[] primitiveTypeInfos, boolean useIncludeColumns, boolean doWriteFewerColumns, Random r) throws Throwable {
    int rowCount = rows.length;
    int columnCount = primitiveTypeInfos.length;
    boolean[] columnsToInclude = null;
    if (useIncludeColumns) {
        columnsToInclude = new boolean[columnCount];
        for (int i = 0; i < columnCount; i++) {
            columnsToInclude[i] = r.nextBoolean();
        }
    }
    int writeColumnCount = columnCount;
    PrimitiveTypeInfo[] writePrimitiveTypeInfos = primitiveTypeInfos;
    if (doWriteFewerColumns) {
        writeColumnCount = writeRowOI.getAllStructFieldRefs().size();
        writePrimitiveTypeInfos = Arrays.copyOf(primitiveTypeInfos, writeColumnCount);
    }
    LazyBinarySerializeWrite lazyBinarySerializeWrite = new LazyBinarySerializeWrite(writeColumnCount);
    // Try to serialize
    BytesWritable[] serializeWriteBytes = new BytesWritable[rowCount];
    for (int i = 0; i < rowCount; i++) {
        Object[] row = rows[i];
        Output output = new Output();
        lazyBinarySerializeWrite.set(output);
        for (int index = 0; index < writeColumnCount; index++) {
            Writable writable = (Writable) row[index];
            VerifyFast.serializeWrite(lazyBinarySerializeWrite, primitiveTypeInfos[index], writable);
        }
        BytesWritable bytesWritable = new BytesWritable();
        bytesWritable.set(output.getData(), 0, output.getLength());
        serializeWriteBytes[i] = bytesWritable;
    }
    // Try to deserialize
    for (int i = 0; i < rowCount; i++) {
        Object[] row = rows[i];
        // Specifying the right type info length tells LazyBinaryDeserializeRead which is the last
        // column.
        LazyBinaryDeserializeRead lazyBinaryDeserializeRead = new LazyBinaryDeserializeRead(writePrimitiveTypeInfos, /* useExternalBuffer */
        false);
        BytesWritable bytesWritable = serializeWriteBytes[i];
        lazyBinaryDeserializeRead.set(bytesWritable.getBytes(), 0, bytesWritable.getLength());
        for (int index = 0; index < columnCount; index++) {
            if (useIncludeColumns && !columnsToInclude[index]) {
                lazyBinaryDeserializeRead.skipNextField();
            } else if (index >= writeColumnCount) {
                // Should come back a null.
                VerifyFast.verifyDeserializeRead(lazyBinaryDeserializeRead, primitiveTypeInfos[index], null);
            } else {
                Writable writable = (Writable) row[index];
                VerifyFast.verifyDeserializeRead(lazyBinaryDeserializeRead, primitiveTypeInfos[index], writable);
            }
        }
        if (writeColumnCount == columnCount) {
            TestCase.assertTrue(lazyBinaryDeserializeRead.isEndOfInputReached());
        }
    }
    // Try to deserialize using SerDe class our Writable row objects created by SerializeWrite.
    for (int i = 0; i < rowCount; i++) {
        BytesWritable bytesWritable = serializeWriteBytes[i];
        LazyBinaryStruct lazyBinaryStruct;
        if (doWriteFewerColumns) {
            lazyBinaryStruct = (LazyBinaryStruct) serde_fewer.deserialize(bytesWritable);
        } else {
            lazyBinaryStruct = (LazyBinaryStruct) serde.deserialize(bytesWritable);
        }
        Object[] row = rows[i];
        for (int index = 0; index < writeColumnCount; index++) {
            PrimitiveTypeInfo primitiveTypeInfo = primitiveTypeInfos[index];
            Writable writable = (Writable) row[index];
            Object object = lazyBinaryStruct.getField(index);
            if (writable == null || object == null) {
                if (writable != null || object != null) {
                    fail("SerDe deserialized NULL column mismatch");
                }
            } else {
                if (!object.equals(writable)) {
                    fail("SerDe deserialized value does not match");
                }
            }
        }
    }
    // One Writable per row.
    BytesWritable[] serdeBytes = new BytesWritable[rowCount];
    // Serialize using the SerDe, then below deserialize using DeserializeRead.
    Object[] serdeRow = new Object[writeColumnCount];
    for (int i = 0; i < rowCount; i++) {
        Object[] row = rows[i];
        // LazyBinary seems to work better with an row object array instead of a Java object...
        for (int index = 0; index < writeColumnCount; index++) {
            serdeRow[index] = row[index];
        }
        BytesWritable serialized;
        if (doWriteFewerColumns) {
            serialized = (BytesWritable) serde_fewer.serialize(serdeRow, writeRowOI);
        } else {
            serialized = (BytesWritable) serde.serialize(serdeRow, rowOI);
        }
        BytesWritable bytesWritable = new BytesWritable(Arrays.copyOfRange(serialized.getBytes(), 0, serialized.getLength()));
        byte[] bytes1 = bytesWritable.getBytes();
        BytesWritable lazySerializedWriteBytes = serializeWriteBytes[i];
        byte[] bytes2 = Arrays.copyOfRange(lazySerializedWriteBytes.getBytes(), 0, lazySerializedWriteBytes.getLength());
        if (bytes1.length != bytes2.length) {
            fail("SerializeWrite length " + bytes2.length + " and " + "SerDe serialization length " + bytes1.length + " do not match (" + Arrays.toString(primitiveTypeInfos) + ")");
        }
        if (!Arrays.equals(bytes1, bytes2)) {
            fail("SerializeWrite and SerDe serialization does not match (" + Arrays.toString(primitiveTypeInfos) + ")");
        }
        serdeBytes[i] = bytesWritable;
    }
    // Try to deserialize using DeserializeRead our Writable row objects created by SerDe.
    for (int i = 0; i < rowCount; i++) {
        Object[] row = rows[i];
        // When doWriteFewerColumns, try to read more fields than exist in buffer.
        LazyBinaryDeserializeRead lazyBinaryDeserializeRead = new LazyBinaryDeserializeRead(primitiveTypeInfos, /* useExternalBuffer */
        false);
        BytesWritable bytesWritable = serdeBytes[i];
        lazyBinaryDeserializeRead.set(bytesWritable.getBytes(), 0, bytesWritable.getLength());
        for (int index = 0; index < columnCount; index++) {
            if (useIncludeColumns && !columnsToInclude[index]) {
                lazyBinaryDeserializeRead.skipNextField();
            } else if (index >= writeColumnCount) {
                // Should come back a null.
                VerifyFast.verifyDeserializeRead(lazyBinaryDeserializeRead, primitiveTypeInfos[index], null);
            } else {
                Writable writable = (Writable) row[index];
                VerifyFast.verifyDeserializeRead(lazyBinaryDeserializeRead, primitiveTypeInfos[index], writable);
            }
        }
        if (writeColumnCount == columnCount) {
            TestCase.assertTrue(lazyBinaryDeserializeRead.isEndOfInputReached());
        }
    }
}
Also used : LazyBinarySerializeWrite(org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinarySerializeWrite) Writable(org.apache.hadoop.io.Writable) BytesWritable(org.apache.hadoop.io.BytesWritable) BytesWritable(org.apache.hadoop.io.BytesWritable) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) Output(org.apache.hadoop.hive.serde2.ByteStream.Output) LazyBinaryDeserializeRead(org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead)

Example 5 with DeserializeRead

use of org.apache.hadoop.hive.serde2.fast.DeserializeRead in project hive by apache.

the class TestLazySimpleFast method testLazySimpleFast.

private void testLazySimpleFast(SerdeRandomRowSource source, Object[][] rows, LazySimpleSerDe serde, StructObjectInspector rowOI, LazySimpleSerDe serde_fewer, StructObjectInspector writeRowOI, byte separator, LazySerDeParameters serdeParams, LazySerDeParameters serdeParams_fewer, PrimitiveTypeInfo[] primitiveTypeInfos, boolean useIncludeColumns, boolean doWriteFewerColumns, Random r) throws Throwable {
    int rowCount = rows.length;
    int columnCount = primitiveTypeInfos.length;
    boolean[] columnsToInclude = null;
    if (useIncludeColumns) {
        columnsToInclude = new boolean[columnCount];
        for (int i = 0; i < columnCount; i++) {
            columnsToInclude[i] = r.nextBoolean();
        }
    }
    int writeColumnCount = columnCount;
    PrimitiveTypeInfo[] writePrimitiveTypeInfos = primitiveTypeInfos;
    if (doWriteFewerColumns) {
        writeColumnCount = writeRowOI.getAllStructFieldRefs().size();
        writePrimitiveTypeInfos = Arrays.copyOf(primitiveTypeInfos, writeColumnCount);
    }
    // Try to serialize
    BytesWritable[] serializeWriteBytes = new BytesWritable[rowCount];
    for (int i = 0; i < rowCount; i++) {
        Object[] row = rows[i];
        Output output = new Output();
        LazySimpleSerializeWrite lazySimpleSerializeWrite = new LazySimpleSerializeWrite(columnCount, separator, serdeParams);
        lazySimpleSerializeWrite.set(output);
        for (int index = 0; index < columnCount; index++) {
            Writable writable = (Writable) row[index];
            VerifyFast.serializeWrite(lazySimpleSerializeWrite, primitiveTypeInfos[index], writable);
        }
        BytesWritable bytesWritable = new BytesWritable();
        bytesWritable.set(output.getData(), 0, output.getLength());
        serializeWriteBytes[i] = bytesWritable;
    }
    // Try to deserialize
    for (int i = 0; i < rowCount; i++) {
        Object[] row = rows[i];
        LazySimpleDeserializeRead lazySimpleDeserializeRead = new LazySimpleDeserializeRead(writePrimitiveTypeInfos, /* useExternalBuffer */
        false, separator, serdeParams);
        BytesWritable bytesWritable = serializeWriteBytes[i];
        byte[] bytes = bytesWritable.getBytes();
        int length = bytesWritable.getLength();
        lazySimpleDeserializeRead.set(bytes, 0, length);
        char[] chars = new char[length];
        for (int c = 0; c < chars.length; c++) {
            chars[c] = (char) (bytes[c] & 0xFF);
        }
        for (int index = 0; index < columnCount; index++) {
            if (useIncludeColumns && !columnsToInclude[index]) {
                lazySimpleDeserializeRead.skipNextField();
            } else if (index >= writeColumnCount) {
                // Should come back a null.
                VerifyFast.verifyDeserializeRead(lazySimpleDeserializeRead, primitiveTypeInfos[index], null);
            } else {
                Writable writable = (Writable) row[index];
                VerifyFast.verifyDeserializeRead(lazySimpleDeserializeRead, primitiveTypeInfos[index], writable);
            }
        }
        if (writeColumnCount == columnCount) {
            TestCase.assertTrue(lazySimpleDeserializeRead.isEndOfInputReached());
        }
    }
    // Try to deserialize using SerDe class our Writable row objects created by SerializeWrite.
    for (int i = 0; i < rowCount; i++) {
        BytesWritable bytesWritable = serializeWriteBytes[i];
        LazyStruct lazySimpleStruct = (LazyStruct) serde.deserialize(bytesWritable);
        Object[] row = rows[i];
        for (int index = 0; index < columnCount; index++) {
            PrimitiveTypeInfo primitiveTypeInfo = primitiveTypeInfos[index];
            Writable writable = (Writable) row[index];
            LazyPrimitive lazyPrimitive = (LazyPrimitive) lazySimpleStruct.getField(index);
            Object object;
            if (lazyPrimitive != null) {
                object = lazyPrimitive.getWritableObject();
            } else {
                object = null;
            }
            if (writable == null || object == null) {
                if (writable != null || object != null) {
                    fail("SerDe deserialized NULL column mismatch");
                }
            } else {
                if (!object.equals(writable)) {
                    fail("SerDe deserialized value does not match");
                }
            }
        }
    }
    // One Writable per row.
    byte[][] serdeBytes = new byte[rowCount][];
    // Serialize using the SerDe, then below deserialize using DeserializeRead.
    Object[] serdeRow = new Object[columnCount];
    for (int i = 0; i < rowCount; i++) {
        Object[] row = rows[i];
        // LazySimple seems to work better with an row object array instead of a Java object...
        for (int index = 0; index < columnCount; index++) {
            serdeRow[index] = row[index];
        }
        Text serialized = (Text) serde.serialize(serdeRow, rowOI);
        byte[] bytes1 = Arrays.copyOfRange(serialized.getBytes(), 0, serialized.getLength());
        byte[] bytes2 = Arrays.copyOfRange(serializeWriteBytes[i].getBytes(), 0, serializeWriteBytes[i].getLength());
        if (!Arrays.equals(bytes1, bytes2)) {
            fail("SerializeWrite and SerDe serialization does not match");
        }
        serdeBytes[i] = copyBytes(serialized);
    }
    // Try to deserialize using DeserializeRead our Writable row objects created by SerDe.
    for (int i = 0; i < rowCount; i++) {
        Object[] row = rows[i];
        LazySimpleDeserializeRead lazySimpleDeserializeRead = new LazySimpleDeserializeRead(writePrimitiveTypeInfos, /* useExternalBuffer */
        false, separator, serdeParams);
        byte[] bytes = serdeBytes[i];
        lazySimpleDeserializeRead.set(bytes, 0, bytes.length);
        for (int index = 0; index < columnCount; index++) {
            if (useIncludeColumns && !columnsToInclude[index]) {
                lazySimpleDeserializeRead.skipNextField();
            } else if (index >= writeColumnCount) {
                // Should come back a null.
                VerifyFast.verifyDeserializeRead(lazySimpleDeserializeRead, primitiveTypeInfos[index], null);
            } else {
                Writable writable = (Writable) row[index];
                VerifyFast.verifyDeserializeRead(lazySimpleDeserializeRead, primitiveTypeInfos[index], writable);
            }
        }
        if (writeColumnCount == columnCount) {
            TestCase.assertTrue(lazySimpleDeserializeRead.isEndOfInputReached());
        }
    }
}
Also used : Writable(org.apache.hadoop.io.Writable) BytesWritable(org.apache.hadoop.io.BytesWritable) LazySimpleDeserializeRead(org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleDeserializeRead) BytesWritable(org.apache.hadoop.io.BytesWritable) Text(org.apache.hadoop.io.Text) LazySimpleSerializeWrite(org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleSerializeWrite) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) Output(org.apache.hadoop.hive.serde2.ByteStream.Output)

Aggregations

BytesWritable (org.apache.hadoop.io.BytesWritable)7 Output (org.apache.hadoop.hive.serde2.ByteStream.Output)5 ByteWritable (org.apache.hadoop.hive.serde2.io.ByteWritable)4 DoubleWritable (org.apache.hadoop.hive.serde2.io.DoubleWritable)4 ShortWritable (org.apache.hadoop.hive.serde2.io.ShortWritable)4 BooleanWritable (org.apache.hadoop.io.BooleanWritable)4 FloatWritable (org.apache.hadoop.io.FloatWritable)4 IntWritable (org.apache.hadoop.io.IntWritable)4 LongWritable (org.apache.hadoop.io.LongWritable)4 Writable (org.apache.hadoop.io.Writable)4 Timestamp (java.sql.Timestamp)3 HiveChar (org.apache.hadoop.hive.common.type.HiveChar)3 HiveDecimal (org.apache.hadoop.hive.common.type.HiveDecimal)3 HiveIntervalDayTime (org.apache.hadoop.hive.common.type.HiveIntervalDayTime)3 HiveIntervalYearMonth (org.apache.hadoop.hive.common.type.HiveIntervalYearMonth)3 HiveVarchar (org.apache.hadoop.hive.common.type.HiveVarchar)3 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)3 BinarySortableDeserializeRead (org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead)3 BinarySortableSerializeWrite (org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite)3 LazySimpleDeserializeRead (org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleDeserializeRead)3