Search in sources :

Example 6 with SerdeRandomRowSource

use of org.apache.hadoop.hive.serde2.SerdeRandomRowSource in project hive by apache.

the class TestBinarySortableFast method testBinarySortableFastCase.

private void testBinarySortableFastCase(int caseNum, boolean doNonRandomFill, Random r, SerdeRandomRowSource.SupportedTypes supportedTypes, int depth) throws Throwable {
    SerdeRandomRowSource source = new SerdeRandomRowSource();
    // UNDONE: Until Fast BinarySortable supports complex types -- disable.
    source.init(r, supportedTypes, depth);
    int rowCount = 1000;
    Object[][] rows = source.randomRows(rowCount);
    if (doNonRandomFill) {
        MyTestClass.nonRandomRowFill(rows, source.primitiveCategories());
    }
    // We need to operate on sorted data to fully test BinarySortable.
    source.sort(rows);
    StructObjectInspector rowStructObjectInspector = source.rowStructObjectInspector();
    TypeInfo[] typeInfos = source.typeInfos();
    int columnCount = typeInfos.length;
    int writeColumnCount = columnCount;
    StructObjectInspector writeRowStructObjectInspector = rowStructObjectInspector;
    boolean doWriteFewerColumns = r.nextBoolean();
    if (doWriteFewerColumns) {
        writeColumnCount = 1 + r.nextInt(columnCount);
        if (writeColumnCount == columnCount) {
            doWriteFewerColumns = false;
        } else {
            writeRowStructObjectInspector = source.partialRowStructObjectInspector(writeColumnCount);
        }
    }
    String fieldNames = ObjectInspectorUtils.getFieldNames(rowStructObjectInspector);
    String fieldTypes = ObjectInspectorUtils.getFieldTypes(rowStructObjectInspector);
    String order;
    order = StringUtils.leftPad("", columnCount, '+');
    String nullOrder;
    nullOrder = StringUtils.leftPad("", columnCount, 'a');
    AbstractSerDe serde_ascending = TestBinarySortableSerDe.getSerDe(fieldNames, fieldTypes, order, nullOrder);
    AbstractSerDe serde_ascending_fewer = null;
    if (doWriteFewerColumns) {
        String partialFieldNames = ObjectInspectorUtils.getFieldNames(writeRowStructObjectInspector);
        String partialFieldTypes = ObjectInspectorUtils.getFieldTypes(writeRowStructObjectInspector);
        serde_ascending_fewer = TestBinarySortableSerDe.getSerDe(partialFieldNames, partialFieldTypes, order, nullOrder);
    }
    order = StringUtils.leftPad("", columnCount, '-');
    nullOrder = StringUtils.leftPad("", columnCount, 'z');
    AbstractSerDe serde_descending = TestBinarySortableSerDe.getSerDe(fieldNames, fieldTypes, order, nullOrder);
    AbstractSerDe serde_descending_fewer = null;
    if (doWriteFewerColumns) {
        String partialFieldNames = ObjectInspectorUtils.getFieldNames(writeRowStructObjectInspector);
        String partialFieldTypes = ObjectInspectorUtils.getFieldTypes(writeRowStructObjectInspector);
        serde_descending_fewer = TestBinarySortableSerDe.getSerDe(partialFieldNames, partialFieldTypes, order, nullOrder);
    }
    boolean[] columnSortOrderIsDesc = new boolean[columnCount];
    Arrays.fill(columnSortOrderIsDesc, false);
    byte[] columnNullMarker = new byte[columnCount];
    Arrays.fill(columnNullMarker, BinarySortableSerDe.ZERO);
    byte[] columnNotNullMarker = new byte[columnCount];
    Arrays.fill(columnNotNullMarker, BinarySortableSerDe.ONE);
    /*
     * Ascending.
     */
    testBinarySortableFast(source, rows, columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker, serde_ascending, rowStructObjectInspector, serde_ascending_fewer, writeRowStructObjectInspector, /* ascending */
    true, typeInfos, /* useIncludeColumns */
    false, /* doWriteFewerColumns */
    false, r);
    testBinarySortableFast(source, rows, columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker, serde_ascending, rowStructObjectInspector, serde_ascending_fewer, writeRowStructObjectInspector, /* ascending */
    true, typeInfos, /* useIncludeColumns */
    true, /* doWriteFewerColumns */
    false, r);
    if (doWriteFewerColumns) {
        testBinarySortableFast(source, rows, columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker, serde_ascending, rowStructObjectInspector, serde_ascending_fewer, writeRowStructObjectInspector, /* ascending */
        true, typeInfos, /* useIncludeColumns */
        false, /* doWriteFewerColumns */
        true, r);
        testBinarySortableFast(source, rows, columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker, serde_ascending, rowStructObjectInspector, serde_ascending_fewer, writeRowStructObjectInspector, /* ascending */
        true, typeInfos, /* useIncludeColumns */
        true, /* doWriteFewerColumns */
        true, r);
    }
    /*
     * Descending.
     */
    Arrays.fill(columnSortOrderIsDesc, true);
    testBinarySortableFast(source, rows, columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker, serde_descending, rowStructObjectInspector, serde_ascending_fewer, writeRowStructObjectInspector, /* ascending */
    false, typeInfos, /* useIncludeColumns */
    false, /* doWriteFewerColumns */
    false, r);
    testBinarySortableFast(source, rows, columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker, serde_descending, rowStructObjectInspector, serde_ascending_fewer, writeRowStructObjectInspector, /* ascending */
    false, typeInfos, /* useIncludeColumns */
    true, /* doWriteFewerColumns */
    false, r);
    if (doWriteFewerColumns) {
        testBinarySortableFast(source, rows, columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker, serde_descending, rowStructObjectInspector, serde_descending_fewer, writeRowStructObjectInspector, /* ascending */
        false, typeInfos, /* useIncludeColumns */
        false, /* doWriteFewerColumns */
        true, r);
        testBinarySortableFast(source, rows, columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker, serde_descending, rowStructObjectInspector, serde_descending_fewer, writeRowStructObjectInspector, /* ascending */
        false, typeInfos, /* useIncludeColumns */
        true, /* doWriteFewerColumns */
        true, r);
    }
}
Also used : SerdeRandomRowSource(org.apache.hadoop.hive.serde2.SerdeRandomRowSource) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) AbstractSerDe(org.apache.hadoop.hive.serde2.AbstractSerDe) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 7 with SerdeRandomRowSource

use of org.apache.hadoop.hive.serde2.SerdeRandomRowSource in project hive by apache.

the class TestLazyBinaryFast method testLazyBinaryFast.

private void testLazyBinaryFast(SerdeRandomRowSource source, Object[][] rows, AbstractSerDe serde, StructObjectInspector rowOI, AbstractSerDe serde_fewer, StructObjectInspector writeRowOI, TypeInfo[] typeInfos, boolean useIncludeColumns, boolean doWriteFewerColumns, Random r) throws Throwable {
    int rowCount = rows.length;
    int columnCount = typeInfos.length;
    boolean[] columnsToInclude = null;
    if (useIncludeColumns) {
        columnsToInclude = new boolean[columnCount];
        for (int i = 0; i < columnCount; i++) {
            columnsToInclude[i] = r.nextBoolean();
        }
    }
    int writeColumnCount = columnCount;
    TypeInfo[] writeTypeInfos = typeInfos;
    if (doWriteFewerColumns) {
        writeColumnCount = writeRowOI.getAllStructFieldRefs().size();
        writeTypeInfos = Arrays.copyOf(typeInfos, writeColumnCount);
    }
    LazyBinarySerializeWrite lazyBinarySerializeWrite = new LazyBinarySerializeWrite(writeColumnCount);
    // Try to serialize
    BytesWritable[] serializeWriteBytes = new BytesWritable[rowCount];
    for (int i = 0; i < rowCount; i++) {
        Object[] row = rows[i];
        Output output = new Output();
        lazyBinarySerializeWrite.set(output);
        for (int index = 0; index < writeColumnCount; index++) {
            VerifyFast.serializeWrite(lazyBinarySerializeWrite, typeInfos[index], row[index]);
        }
        BytesWritable bytesWritable = new BytesWritable();
        bytesWritable.set(output.getData(), 0, output.getLength());
        serializeWriteBytes[i] = bytesWritable;
    }
    // Try to deserialize
    for (int i = 0; i < rowCount; i++) {
        Object[] row = rows[i];
        // Specifying the right type info length tells LazyBinaryDeserializeRead which is the last
        // column.
        LazyBinaryDeserializeRead lazyBinaryDeserializeRead = new LazyBinaryDeserializeRead(writeTypeInfos, /* useExternalBuffer */
        false);
        BytesWritable bytesWritable = serializeWriteBytes[i];
        lazyBinaryDeserializeRead.set(bytesWritable.getBytes(), 0, bytesWritable.getLength());
        for (int index = 0; index < columnCount; index++) {
            if (useIncludeColumns && !columnsToInclude[index]) {
                lazyBinaryDeserializeRead.skipNextField();
            } else if (index >= writeColumnCount) {
                // Should come back a null.
                VerifyFast.verifyDeserializeRead(lazyBinaryDeserializeRead, typeInfos[index], null);
            } else {
                verifyRead(lazyBinaryDeserializeRead, typeInfos[index], row[index]);
            }
        }
        if (writeColumnCount == columnCount) {
            assertTrue(lazyBinaryDeserializeRead.isEndOfInputReached());
        }
    }
    // Try to deserialize using SerDe class our Writable row objects created by SerializeWrite.
    for (int i = 0; i < rowCount; i++) {
        BytesWritable bytesWritable = serializeWriteBytes[i];
        LazyBinaryStruct lazyBinaryStruct;
        if (doWriteFewerColumns) {
            lazyBinaryStruct = (LazyBinaryStruct) serde_fewer.deserialize(bytesWritable);
        } else {
            lazyBinaryStruct = (LazyBinaryStruct) serde.deserialize(bytesWritable);
        }
        Object[] row = rows[i];
        for (int index = 0; index < writeColumnCount; index++) {
            TypeInfo typeInfo = typeInfos[index];
            Object object = lazyBinaryStruct.getField(index);
            if (row[index] == null || object == null) {
                if (row[index] != null || object != null) {
                    fail("SerDe deserialized NULL column mismatch");
                }
            } else {
                if (!VerifyLazy.lazyCompare(typeInfo, object, row[index])) {
                    fail("SerDe deserialized value does not match");
                }
            }
        }
    }
    // One Writable per row.
    BytesWritable[] serdeBytes = new BytesWritable[rowCount];
    // Serialize using the SerDe, then below deserialize using DeserializeRead.
    Object[] serdeRow = new Object[writeColumnCount];
    for (int i = 0; i < rowCount; i++) {
        Object[] row = rows[i];
        // LazyBinary seems to work better with an row object array instead of a Java object...
        for (int index = 0; index < writeColumnCount; index++) {
            serdeRow[index] = row[index];
        }
        BytesWritable serialized;
        if (doWriteFewerColumns) {
            serialized = (BytesWritable) serde_fewer.serialize(serdeRow, writeRowOI);
        } else {
            serialized = (BytesWritable) serde.serialize(serdeRow, rowOI);
        }
        BytesWritable bytesWritable = new BytesWritable(Arrays.copyOfRange(serialized.getBytes(), 0, serialized.getLength()));
        byte[] bytes1 = bytesWritable.getBytes();
        BytesWritable lazySerializedWriteBytes = serializeWriteBytes[i];
        byte[] bytes2 = Arrays.copyOfRange(lazySerializedWriteBytes.getBytes(), 0, lazySerializedWriteBytes.getLength());
        if (bytes1.length != bytes2.length) {
            fail("SerializeWrite length " + bytes2.length + " and " + "SerDe serialization length " + bytes1.length + " do not match (" + Arrays.toString(typeInfos) + ")");
        }
        if (!Arrays.equals(bytes1, bytes2)) {
            fail("SerializeWrite and SerDe serialization does not match (" + Arrays.toString(typeInfos) + ")");
        }
        serdeBytes[i] = bytesWritable;
    }
    // Try to deserialize using DeserializeRead our Writable row objects created by SerDe.
    for (int i = 0; i < rowCount; i++) {
        Object[] row = rows[i];
        // When doWriteFewerColumns, try to read more fields than exist in buffer.
        LazyBinaryDeserializeRead lazyBinaryDeserializeRead = new LazyBinaryDeserializeRead(typeInfos, /* useExternalBuffer */
        false);
        BytesWritable bytesWritable = serdeBytes[i];
        lazyBinaryDeserializeRead.set(bytesWritable.getBytes(), 0, bytesWritable.getLength());
        for (int index = 0; index < columnCount; index++) {
            if (useIncludeColumns && !columnsToInclude[index]) {
                lazyBinaryDeserializeRead.skipNextField();
            } else if (index >= writeColumnCount) {
                // Should come back a null.
                VerifyFast.verifyDeserializeRead(lazyBinaryDeserializeRead, typeInfos[index], null);
            } else {
                verifyRead(lazyBinaryDeserializeRead, typeInfos[index], row[index]);
            }
        }
        if (writeColumnCount == columnCount) {
            assertTrue(lazyBinaryDeserializeRead.isEndOfInputReached());
        }
    }
}
Also used : LazyBinarySerializeWrite(org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinarySerializeWrite) BytesWritable(org.apache.hadoop.io.BytesWritable) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) Output(org.apache.hadoop.hive.serde2.ByteStream.Output) UnionObject(org.apache.hadoop.hive.serde2.objectinspector.UnionObject) LazyBinaryDeserializeRead(org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead)

Example 8 with SerdeRandomRowSource

use of org.apache.hadoop.hive.serde2.SerdeRandomRowSource in project hive by apache.

the class TestBinarySortableFast method testBinarySortableFast.

private void testBinarySortableFast(SerdeRandomRowSource source, Object[][] rows, boolean[] columnSortOrderIsDesc, byte[] columnNullMarker, byte[] columnNotNullMarker, AbstractSerDe serde, StructObjectInspector rowOI, AbstractSerDe serde_fewer, StructObjectInspector writeRowOI, boolean ascending, PrimitiveTypeInfo[] primitiveTypeInfos, boolean useIncludeColumns, boolean doWriteFewerColumns, Random r) throws Throwable {
    int rowCount = rows.length;
    int columnCount = primitiveTypeInfos.length;
    boolean[] columnsToInclude = null;
    if (useIncludeColumns) {
        columnsToInclude = new boolean[columnCount];
        for (int i = 0; i < columnCount; i++) {
            columnsToInclude[i] = r.nextBoolean();
        }
    }
    int writeColumnCount = columnCount;
    if (doWriteFewerColumns) {
        writeColumnCount = writeRowOI.getAllStructFieldRefs().size();
    }
    BinarySortableSerializeWrite binarySortableSerializeWrite = new BinarySortableSerializeWrite(columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker);
    // Try to serialize
    // One Writable per row.
    BytesWritable[] serializeWriteBytes = new BytesWritable[rowCount];
    int[][] perFieldWriteLengthsArray = new int[rowCount][];
    for (int i = 0; i < rowCount; i++) {
        Object[] row = rows[i];
        Output output = new Output();
        binarySortableSerializeWrite.set(output);
        int[] perFieldWriteLengths = new int[columnCount];
        for (int index = 0; index < writeColumnCount; index++) {
            Writable writable = (Writable) row[index];
            VerifyFast.serializeWrite(binarySortableSerializeWrite, primitiveTypeInfos[index], writable);
            perFieldWriteLengths[index] = output.getLength();
        }
        perFieldWriteLengthsArray[i] = perFieldWriteLengths;
        BytesWritable bytesWritable = new BytesWritable();
        bytesWritable.set(output.getData(), 0, output.getLength());
        serializeWriteBytes[i] = bytesWritable;
        if (i > 0) {
            int compareResult = serializeWriteBytes[i - 1].compareTo(serializeWriteBytes[i]);
            if ((compareResult < 0 && !ascending) || (compareResult > 0 && ascending)) {
                System.out.println("Test failed in " + (ascending ? "ascending" : "descending") + " order with " + (i - 1) + " and " + i);
                System.out.println("serialized data [" + (i - 1) + "] = " + TestBinarySortableSerDe.hexString(serializeWriteBytes[i - 1]));
                System.out.println("serialized data [" + i + "] = " + TestBinarySortableSerDe.hexString(serializeWriteBytes[i]));
                fail("Sort order of serialized " + (i - 1) + " and " + i + " are reversed!");
            }
        }
    }
    // Try to deserialize using DeserializeRead our Writable row objects created by SerializeWrite.
    for (int i = 0; i < rowCount; i++) {
        Object[] row = rows[i];
        BinarySortableDeserializeRead binarySortableDeserializeRead = new BinarySortableDeserializeRead(primitiveTypeInfos, /* useExternalBuffer */
        false, columnSortOrderIsDesc);
        BytesWritable bytesWritable = serializeWriteBytes[i];
        binarySortableDeserializeRead.set(bytesWritable.getBytes(), 0, bytesWritable.getLength());
        for (int index = 0; index < columnCount; index++) {
            if (useIncludeColumns && !columnsToInclude[index]) {
                binarySortableDeserializeRead.skipNextField();
            } else if (index >= writeColumnCount) {
                // Should come back a null.
                VerifyFast.verifyDeserializeRead(binarySortableDeserializeRead, primitiveTypeInfos[index], null);
            } else {
                Writable writable = (Writable) row[index];
                VerifyFast.verifyDeserializeRead(binarySortableDeserializeRead, primitiveTypeInfos[index], writable);
            }
        }
        if (writeColumnCount == columnCount) {
            TestCase.assertTrue(binarySortableDeserializeRead.isEndOfInputReached());
        }
        /*
       * Clip off one byte and expect to get an EOFException on the write field.
       */
        BinarySortableDeserializeRead binarySortableDeserializeRead2 = new BinarySortableDeserializeRead(primitiveTypeInfos, /* useExternalBuffer */
        false, columnSortOrderIsDesc);
        binarySortableDeserializeRead2.set(bytesWritable.getBytes(), 0, // One fewer byte.
        bytesWritable.getLength() - 1);
        for (int index = 0; index < writeColumnCount; index++) {
            Writable writable = (Writable) row[index];
            if (index == writeColumnCount - 1) {
                boolean threw = false;
                try {
                    VerifyFast.verifyDeserializeRead(binarySortableDeserializeRead2, primitiveTypeInfos[index], writable);
                } catch (EOFException e) {
                    //          debugDetailedReadPositionString = binarySortableDeserializeRead2.getDetailedReadPositionString();
                    //          debugStackTrace = e.getStackTrace();
                    threw = true;
                }
                TestCase.assertTrue(threw);
            } else {
                if (useIncludeColumns && !columnsToInclude[index]) {
                    binarySortableDeserializeRead2.skipNextField();
                } else {
                    VerifyFast.verifyDeserializeRead(binarySortableDeserializeRead2, primitiveTypeInfos[index], writable);
                }
            }
        }
    }
    // Try to deserialize using SerDe class our Writable row objects created by SerializeWrite.
    for (int i = 0; i < rowCount; i++) {
        BytesWritable bytesWritable = serializeWriteBytes[i];
        // Note that regular SerDe doesn't tolerate fewer columns.
        List<Object> deserializedRow;
        if (doWriteFewerColumns) {
            deserializedRow = (List<Object>) serde_fewer.deserialize(bytesWritable);
        } else {
            deserializedRow = (List<Object>) serde.deserialize(bytesWritable);
        }
        Object[] row = rows[i];
        for (int index = 0; index < writeColumnCount; index++) {
            Object expected = row[index];
            Object object = deserializedRow.get(index);
            if (expected == null || object == null) {
                if (expected != null || object != null) {
                    fail("SerDe deserialized NULL column mismatch");
                }
            } else {
                if (!object.equals(expected)) {
                    fail("SerDe deserialized value does not match (expected " + expected.getClass().getName() + " " + expected.toString() + ", actual " + object.getClass().getName() + " " + object.toString() + ")");
                }
            }
        }
    }
    // One Writable per row.
    BytesWritable[] serdeBytes = new BytesWritable[rowCount];
    // Serialize using the SerDe, then below deserialize using DeserializeRead.
    for (int i = 0; i < rowCount; i++) {
        Object[] row = rows[i];
        // Since SerDe reuses memory, we will need to make a copy.
        BytesWritable serialized;
        if (doWriteFewerColumns) {
            serialized = (BytesWritable) serde_fewer.serialize(row, rowOI);
        } else {
            serialized = (BytesWritable) serde.serialize(row, rowOI);
            ;
        }
        BytesWritable bytesWritable = new BytesWritable();
        bytesWritable.set(serialized);
        byte[] serDeOutput = Arrays.copyOfRange(bytesWritable.getBytes(), 0, bytesWritable.getLength());
        byte[] serializeWriteExpected = Arrays.copyOfRange(serializeWriteBytes[i].getBytes(), 0, serializeWriteBytes[i].getLength());
        if (!Arrays.equals(serDeOutput, serializeWriteExpected)) {
            int mismatchPos = -1;
            if (serDeOutput.length != serializeWriteExpected.length) {
                for (int b = 0; b < Math.min(serDeOutput.length, serializeWriteExpected.length); b++) {
                    if (serDeOutput[b] != serializeWriteExpected[b]) {
                        mismatchPos = b;
                        break;
                    }
                }
                fail("Different byte array lengths: serDeOutput.length " + serDeOutput.length + ", serializeWriteExpected.length " + serializeWriteExpected.length + " mismatchPos " + mismatchPos + " perFieldWriteLengths " + Arrays.toString(perFieldWriteLengthsArray[i]));
            }
            List<Integer> differentPositions = new ArrayList();
            for (int b = 0; b < serDeOutput.length; b++) {
                if (serDeOutput[b] != serializeWriteExpected[b]) {
                    differentPositions.add(b);
                }
            }
            if (differentPositions.size() > 0) {
                List<String> serializeWriteExpectedFields = new ArrayList<String>();
                List<String> serDeFields = new ArrayList<String>();
                int f = 0;
                int lastBegin = 0;
                for (int b = 0; b < serDeOutput.length; b++) {
                    int writeLength = perFieldWriteLengthsArray[i][f];
                    if (b + 1 == writeLength) {
                        serializeWriteExpectedFields.add(displayBytes(serializeWriteExpected, lastBegin, writeLength - lastBegin));
                        serDeFields.add(displayBytes(serDeOutput, lastBegin, writeLength - lastBegin));
                        f++;
                        lastBegin = b + 1;
                    }
                }
                fail("SerializeWrite and SerDe serialization does not match at positions " + differentPositions.toString() + "\n(SerializeWrite: " + serializeWriteExpectedFields.toString() + "\nSerDe: " + serDeFields.toString() + "\nperFieldWriteLengths " + Arrays.toString(perFieldWriteLengthsArray[i]) + "\nprimitiveTypeInfos " + Arrays.toString(primitiveTypeInfos) + "\nrow " + Arrays.toString(row));
            }
        }
        serdeBytes[i] = bytesWritable;
    }
    // Try to deserialize using DeserializeRead our Writable row objects created by SerDe.
    for (int i = 0; i < rowCount; i++) {
        Object[] row = rows[i];
        BinarySortableDeserializeRead binarySortableDeserializeRead = new BinarySortableDeserializeRead(primitiveTypeInfos, /* useExternalBuffer */
        false, columnSortOrderIsDesc);
        BytesWritable bytesWritable = serdeBytes[i];
        binarySortableDeserializeRead.set(bytesWritable.getBytes(), 0, bytesWritable.getLength());
        for (int index = 0; index < columnCount; index++) {
            if (useIncludeColumns && !columnsToInclude[index]) {
                binarySortableDeserializeRead.skipNextField();
            } else if (index >= writeColumnCount) {
                // Should come back a null.
                VerifyFast.verifyDeserializeRead(binarySortableDeserializeRead, primitiveTypeInfos[index], null);
            } else {
                Writable writable = (Writable) row[index];
                VerifyFast.verifyDeserializeRead(binarySortableDeserializeRead, primitiveTypeInfos[index], writable);
            }
        }
        if (writeColumnCount == columnCount) {
            TestCase.assertTrue(binarySortableDeserializeRead.isEndOfInputReached());
        }
    }
}
Also used : BinarySortableDeserializeRead(org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead) ArrayList(java.util.ArrayList) Writable(org.apache.hadoop.io.Writable) BytesWritable(org.apache.hadoop.io.BytesWritable) BytesWritable(org.apache.hadoop.io.BytesWritable) BinarySortableSerializeWrite(org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite) Output(org.apache.hadoop.hive.serde2.ByteStream.Output) EOFException(java.io.EOFException)

Example 9 with SerdeRandomRowSource

use of org.apache.hadoop.hive.serde2.SerdeRandomRowSource in project hive by apache.

the class TestBinarySortableFast method testBinarySortableFastCase.

private void testBinarySortableFastCase(int caseNum, boolean doNonRandomFill, Random r) throws Throwable {
    SerdeRandomRowSource source = new SerdeRandomRowSource();
    source.init(r);
    int rowCount = 1000;
    Object[][] rows = source.randomRows(rowCount);
    if (doNonRandomFill) {
        MyTestClass.nonRandomRowFill(rows, source.primitiveCategories());
    }
    // We need to operate on sorted data to fully test BinarySortable.
    source.sort(rows);
    StructObjectInspector rowStructObjectInspector = source.rowStructObjectInspector();
    PrimitiveTypeInfo[] primitiveTypeInfos = source.primitiveTypeInfos();
    int columnCount = primitiveTypeInfos.length;
    int writeColumnCount = columnCount;
    StructObjectInspector writeRowStructObjectInspector = rowStructObjectInspector;
    boolean doWriteFewerColumns = r.nextBoolean();
    if (doWriteFewerColumns) {
        writeColumnCount = 1 + r.nextInt(columnCount);
        if (writeColumnCount == columnCount) {
            doWriteFewerColumns = false;
        } else {
            writeRowStructObjectInspector = source.partialRowStructObjectInspector(writeColumnCount);
        }
    }
    String fieldNames = ObjectInspectorUtils.getFieldNames(rowStructObjectInspector);
    String fieldTypes = ObjectInspectorUtils.getFieldTypes(rowStructObjectInspector);
    String order;
    order = StringUtils.leftPad("", columnCount, '+');
    String nullOrder;
    nullOrder = StringUtils.leftPad("", columnCount, 'a');
    AbstractSerDe serde_ascending = TestBinarySortableSerDe.getSerDe(fieldNames, fieldTypes, order, nullOrder);
    AbstractSerDe serde_ascending_fewer = null;
    if (doWriteFewerColumns) {
        String partialFieldNames = ObjectInspectorUtils.getFieldNames(writeRowStructObjectInspector);
        String partialFieldTypes = ObjectInspectorUtils.getFieldTypes(writeRowStructObjectInspector);
        serde_ascending_fewer = TestBinarySortableSerDe.getSerDe(partialFieldNames, partialFieldTypes, order, nullOrder);
    }
    order = StringUtils.leftPad("", columnCount, '-');
    nullOrder = StringUtils.leftPad("", columnCount, 'z');
    AbstractSerDe serde_descending = TestBinarySortableSerDe.getSerDe(fieldNames, fieldTypes, order, nullOrder);
    AbstractSerDe serde_descending_fewer = null;
    if (doWriteFewerColumns) {
        String partialFieldNames = ObjectInspectorUtils.getFieldNames(writeRowStructObjectInspector);
        String partialFieldTypes = ObjectInspectorUtils.getFieldTypes(writeRowStructObjectInspector);
        serde_descending_fewer = TestBinarySortableSerDe.getSerDe(partialFieldNames, partialFieldTypes, order, nullOrder);
    }
    boolean[] columnSortOrderIsDesc = new boolean[columnCount];
    Arrays.fill(columnSortOrderIsDesc, false);
    byte[] columnNullMarker = new byte[columnCount];
    Arrays.fill(columnNullMarker, BinarySortableSerDe.ZERO);
    byte[] columnNotNullMarker = new byte[columnCount];
    Arrays.fill(columnNotNullMarker, BinarySortableSerDe.ONE);
    /*
     * Acending.
     */
    testBinarySortableFast(source, rows, columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker, serde_ascending, rowStructObjectInspector, serde_ascending_fewer, writeRowStructObjectInspector, /* ascending */
    true, primitiveTypeInfos, /* useIncludeColumns */
    false, /* doWriteFewerColumns */
    false, r);
    testBinarySortableFast(source, rows, columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker, serde_ascending, rowStructObjectInspector, serde_ascending_fewer, writeRowStructObjectInspector, /* ascending */
    true, primitiveTypeInfos, /* useIncludeColumns */
    true, /* doWriteFewerColumns */
    false, r);
    if (doWriteFewerColumns) {
        testBinarySortableFast(source, rows, columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker, serde_ascending, rowStructObjectInspector, serde_ascending_fewer, writeRowStructObjectInspector, /* ascending */
        true, primitiveTypeInfos, /* useIncludeColumns */
        false, /* doWriteFewerColumns */
        true, r);
        testBinarySortableFast(source, rows, columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker, serde_ascending, rowStructObjectInspector, serde_ascending_fewer, writeRowStructObjectInspector, /* ascending */
        true, primitiveTypeInfos, /* useIncludeColumns */
        true, /* doWriteFewerColumns */
        true, r);
    }
    /*
     * Descending.
     */
    Arrays.fill(columnSortOrderIsDesc, true);
    testBinarySortableFast(source, rows, columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker, serde_descending, rowStructObjectInspector, serde_ascending_fewer, writeRowStructObjectInspector, /* ascending */
    false, primitiveTypeInfos, /* useIncludeColumns */
    false, /* doWriteFewerColumns */
    false, r);
    testBinarySortableFast(source, rows, columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker, serde_descending, rowStructObjectInspector, serde_ascending_fewer, writeRowStructObjectInspector, /* ascending */
    false, primitiveTypeInfos, /* useIncludeColumns */
    true, /* doWriteFewerColumns */
    false, r);
    if (doWriteFewerColumns) {
        testBinarySortableFast(source, rows, columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker, serde_descending, rowStructObjectInspector, serde_descending_fewer, writeRowStructObjectInspector, /* ascending */
        false, primitiveTypeInfos, /* useIncludeColumns */
        false, /* doWriteFewerColumns */
        true, r);
        testBinarySortableFast(source, rows, columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker, serde_descending, rowStructObjectInspector, serde_descending_fewer, writeRowStructObjectInspector, /* ascending */
        false, primitiveTypeInfos, /* useIncludeColumns */
        true, /* doWriteFewerColumns */
        true, r);
    }
}
Also used : SerdeRandomRowSource(org.apache.hadoop.hive.serde2.SerdeRandomRowSource) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) AbstractSerDe(org.apache.hadoop.hive.serde2.AbstractSerDe) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 10 with SerdeRandomRowSource

use of org.apache.hadoop.hive.serde2.SerdeRandomRowSource in project hive by apache.

the class TestLazyBinaryFast method testLazyBinaryFastCase.

public void testLazyBinaryFastCase(int caseNum, boolean doNonRandomFill, Random r, SerdeRandomRowSource.SupportedTypes supportedTypes, int depth) throws Throwable {
    SerdeRandomRowSource source = new SerdeRandomRowSource();
    source.init(r, supportedTypes, depth);
    int rowCount = 100;
    Object[][] rows = source.randomRows(rowCount);
    if (doNonRandomFill) {
        MyTestClass.nonRandomRowFill(rows, source.primitiveCategories());
    }
    StructObjectInspector rowStructObjectInspector = source.rowStructObjectInspector();
    TypeInfo[] typeInfos = source.typeInfos();
    int columnCount = typeInfos.length;
    int writeColumnCount = columnCount;
    StructObjectInspector writeRowStructObjectInspector = rowStructObjectInspector;
    boolean doWriteFewerColumns = r.nextBoolean();
    if (doWriteFewerColumns) {
        writeColumnCount = 1 + r.nextInt(columnCount);
        if (writeColumnCount == columnCount) {
            doWriteFewerColumns = false;
        } else {
            writeRowStructObjectInspector = source.partialRowStructObjectInspector(writeColumnCount);
        }
    }
    String fieldNames = ObjectInspectorUtils.getFieldNames(rowStructObjectInspector);
    String fieldTypes = ObjectInspectorUtils.getFieldTypes(rowStructObjectInspector);
    TestLazyBinarySerDe testLazyBinarySerDe = new TestLazyBinarySerDe();
    AbstractSerDe serde = testLazyBinarySerDe.getSerDe(fieldNames, fieldTypes);
    AbstractSerDe serde_fewer = null;
    if (doWriteFewerColumns) {
        String partialFieldNames = ObjectInspectorUtils.getFieldNames(writeRowStructObjectInspector);
        String partialFieldTypes = ObjectInspectorUtils.getFieldTypes(writeRowStructObjectInspector);
        serde_fewer = testLazyBinarySerDe.getSerDe(partialFieldNames, partialFieldTypes);
        ;
    }
    testLazyBinaryFast(source, rows, serde, rowStructObjectInspector, serde_fewer, writeRowStructObjectInspector, typeInfos, /* useIncludeColumns */
    false, /* doWriteFewerColumns */
    false, r);
    testLazyBinaryFast(source, rows, serde, rowStructObjectInspector, serde_fewer, writeRowStructObjectInspector, typeInfos, /* useIncludeColumns */
    true, /* doWriteFewerColumns */
    false, r);
/*
     * Can the LazyBinary format really tolerate writing fewer columns?
     */
// if (doWriteFewerColumns) {
// testLazyBinaryFast(
// source, rows,
// serde, rowStructObjectInspector,
// serde_fewer, writeRowStructObjectInspector,
// primitiveTypeInfos,
// /* useIncludeColumns */ false, /* doWriteFewerColumns */ true, r);
// testLazyBinaryFast(
// source, rows,
// serde, rowStructObjectInspector,
// serde_fewer, writeRowStructObjectInspector,
// primitiveTypeInfos,
// /* useIncludeColumns */ true, /* doWriteFewerColumns */ true, r);
// }
}
Also used : SerdeRandomRowSource(org.apache.hadoop.hive.serde2.SerdeRandomRowSource) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) AbstractSerDe(org.apache.hadoop.hive.serde2.AbstractSerDe) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Aggregations

Output (org.apache.hadoop.hive.serde2.ByteStream.Output)6 SerdeRandomRowSource (org.apache.hadoop.hive.serde2.SerdeRandomRowSource)6 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)6 BytesWritable (org.apache.hadoop.io.BytesWritable)6 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)5 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)5 AbstractSerDe (org.apache.hadoop.hive.serde2.AbstractSerDe)4 UnionObject (org.apache.hadoop.hive.serde2.objectinspector.UnionObject)3 Writable (org.apache.hadoop.io.Writable)3 EOFException (java.io.EOFException)2 ArrayList (java.util.ArrayList)2 BinarySortableDeserializeRead (org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead)2 BinarySortableSerializeWrite (org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite)2 LazySimpleDeserializeRead (org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleDeserializeRead)2 LazySimpleSerializeWrite (org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleSerializeWrite)2 LazyBinaryDeserializeRead (org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead)2 LazyBinarySerializeWrite (org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinarySerializeWrite)2 StructTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo)2 Text (org.apache.hadoop.io.Text)2