Examples with AbstractSerDe - org.apache.hadoop.hive.serde2.AbstractSerDe

Example 6 with AbstractSerDe

use of org.apache.hadoop.hive.serde2.AbstractSerDe in project hive by apache.

the class TestInputOutputFormat method testMROutput.

@Test
public void testMROutput() throws Exception {
    Properties properties = new Properties();
    StructObjectInspector inspector;
    synchronized (TestOrcFile.class) {
        inspector = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(NestedRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    AbstractSerDe serde = new OrcSerde();
    OutputFormat<?, ?> outFormat = new OrcOutputFormat();
    RecordWriter writer = outFormat.getRecordWriter(fs, conf, testFilePath.toString(), Reporter.NULL);
    writer.write(NullWritable.get(), serde.serialize(new NestedRow(1, 2, 3), inspector));
    writer.write(NullWritable.get(), serde.serialize(new NestedRow(4, 5, 6), inspector));
    writer.write(NullWritable.get(), serde.serialize(new NestedRow(7, 8, 9), inspector));
    writer.close(Reporter.NULL);
    serde = new OrcSerde();
    properties.setProperty("columns", "z,r");
    properties.setProperty("columns.types", "int:struct<x:int,y:int>");
    SerDeUtils.initializeSerDe(serde, conf, properties, null);
    inspector = (StructObjectInspector) serde.getObjectInspector();
    InputFormat<?, ?> in = new OrcInputFormat();
    FileInputFormat.setInputPaths(conf, testFilePath.toString());
    InputSplit[] splits = in.getSplits(conf, 1);
    assertEquals(1, splits.length);
    ColumnProjectionUtils.appendReadColumns(conf, Collections.singletonList(1));
    conf.set("columns", "z,r");
    conf.set("columns.types", "int:struct<x:int,y:int>");
    org.apache.hadoop.mapred.RecordReader reader = in.getRecordReader(splits[0], conf, Reporter.NULL);
    Object key = reader.createKey();
    Object value = reader.createValue();
    int rowNum = 0;
    List<? extends StructField> fields = inspector.getAllStructFieldRefs();
    StructObjectInspector inner = (StructObjectInspector) fields.get(1).getFieldObjectInspector();
    List<? extends StructField> inFields = inner.getAllStructFieldRefs();
    IntObjectInspector intInspector = (IntObjectInspector) fields.get(0).getFieldObjectInspector();
    while (reader.next(key, value)) {
        assertEquals(null, inspector.getStructFieldData(value, fields.get(0)));
        Object sub = inspector.getStructFieldData(value, fields.get(1));
        assertEquals(3 * rowNum + 1, intInspector.get(inner.getStructFieldData(sub, inFields.get(0))));
        assertEquals(3 * rowNum + 2, intInspector.get(inner.getStructFieldData(sub, inFields.get(1))));
        rowNum += 1;
    }
    assertEquals(3, rowNum);
    reader.close();
}

Also used : IntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector) Properties(java.util.Properties) AbstractSerDe(org.apache.hadoop.hive.serde2.AbstractSerDe) RecordWriter(org.apache.hadoop.mapred.RecordWriter) InputSplit(org.apache.hadoop.mapred.InputSplit) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Test(org.junit.Test)

Example 7 with AbstractSerDe

use of org.apache.hadoop.hive.serde2.AbstractSerDe in project hive by apache.

the class TestLazyBinaryFast method testLazyBinaryFastCase.

public void testLazyBinaryFastCase(int caseNum, boolean doNonRandomFill, Random r) throws Throwable {
    SerdeRandomRowSource source = new SerdeRandomRowSource();
    source.init(r);
    int rowCount = 1000;
    Object[][] rows = source.randomRows(rowCount);
    if (doNonRandomFill) {
        MyTestClass.nonRandomRowFill(rows, source.primitiveCategories());
    }
    StructObjectInspector rowStructObjectInspector = source.rowStructObjectInspector();
    PrimitiveTypeInfo[] primitiveTypeInfos = source.primitiveTypeInfos();
    int columnCount = primitiveTypeInfos.length;
    int writeColumnCount = columnCount;
    StructObjectInspector writeRowStructObjectInspector = rowStructObjectInspector;
    boolean doWriteFewerColumns = r.nextBoolean();
    if (doWriteFewerColumns) {
        writeColumnCount = 1 + r.nextInt(columnCount);
        if (writeColumnCount == columnCount) {
            doWriteFewerColumns = false;
        } else {
            writeRowStructObjectInspector = source.partialRowStructObjectInspector(writeColumnCount);
        }
    }
    String fieldNames = ObjectInspectorUtils.getFieldNames(rowStructObjectInspector);
    String fieldTypes = ObjectInspectorUtils.getFieldTypes(rowStructObjectInspector);
    AbstractSerDe serde = TestLazyBinarySerDe.getSerDe(fieldNames, fieldTypes);
    AbstractSerDe serde_fewer = null;
    if (doWriteFewerColumns) {
        String partialFieldNames = ObjectInspectorUtils.getFieldNames(writeRowStructObjectInspector);
        String partialFieldTypes = ObjectInspectorUtils.getFieldTypes(writeRowStructObjectInspector);
        serde_fewer = TestLazyBinarySerDe.getSerDe(partialFieldNames, partialFieldTypes);
        ;
    }
    testLazyBinaryFast(source, rows, serde, rowStructObjectInspector, serde_fewer, writeRowStructObjectInspector, primitiveTypeInfos, /* useIncludeColumns */
    false, /* doWriteFewerColumns */
    false, r);
    testLazyBinaryFast(source, rows, serde, rowStructObjectInspector, serde_fewer, writeRowStructObjectInspector, primitiveTypeInfos, /* useIncludeColumns */
    true, /* doWriteFewerColumns */
    false, r);
/*
     * Can the LazyBinary format really tolerate writing fewer columns?
     */
// if (doWriteFewerColumns) {
//   testLazyBinaryFast(
//       source, rows,
//       serde, rowStructObjectInspector,
//       serde_fewer, writeRowStructObjectInspector,
//       primitiveTypeInfos,
//       /* useIncludeColumns */ false, /* doWriteFewerColumns */ true, r);
//   testLazyBinaryFast(
//       source, rows,
//       serde, rowStructObjectInspector,
//       serde_fewer, writeRowStructObjectInspector,
//       primitiveTypeInfos,
//       /* useIncludeColumns */ true, /* doWriteFewerColumns */ true, r);
// }
}

Also used : SerdeRandomRowSource(org.apache.hadoop.hive.serde2.SerdeRandomRowSource) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) AbstractSerDe(org.apache.hadoop.hive.serde2.AbstractSerDe) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 8 with AbstractSerDe

use of org.apache.hadoop.hive.serde2.AbstractSerDe in project hive by apache.

the class TestLazyBinaryFast method testLazyBinaryFast.

private void testLazyBinaryFast(SerdeRandomRowSource source, Object[][] rows, AbstractSerDe serde, StructObjectInspector rowOI, AbstractSerDe serde_fewer, StructObjectInspector writeRowOI, PrimitiveTypeInfo[] primitiveTypeInfos, boolean useIncludeColumns, boolean doWriteFewerColumns, Random r) throws Throwable {
    int rowCount = rows.length;
    int columnCount = primitiveTypeInfos.length;
    boolean[] columnsToInclude = null;
    if (useIncludeColumns) {
        columnsToInclude = new boolean[columnCount];
        for (int i = 0; i < columnCount; i++) {
            columnsToInclude[i] = r.nextBoolean();
        }
    }
    int writeColumnCount = columnCount;
    PrimitiveTypeInfo[] writePrimitiveTypeInfos = primitiveTypeInfos;
    if (doWriteFewerColumns) {
        writeColumnCount = writeRowOI.getAllStructFieldRefs().size();
        writePrimitiveTypeInfos = Arrays.copyOf(primitiveTypeInfos, writeColumnCount);
    }
    LazyBinarySerializeWrite lazyBinarySerializeWrite = new LazyBinarySerializeWrite(writeColumnCount);
    // Try to serialize
    BytesWritable[] serializeWriteBytes = new BytesWritable[rowCount];
    for (int i = 0; i < rowCount; i++) {
        Object[] row = rows[i];
        Output output = new Output();
        lazyBinarySerializeWrite.set(output);
        for (int index = 0; index < writeColumnCount; index++) {
            Writable writable = (Writable) row[index];
            VerifyFast.serializeWrite(lazyBinarySerializeWrite, primitiveTypeInfos[index], writable);
        }
        BytesWritable bytesWritable = new BytesWritable();
        bytesWritable.set(output.getData(), 0, output.getLength());
        serializeWriteBytes[i] = bytesWritable;
    }
    // Try to deserialize
    for (int i = 0; i < rowCount; i++) {
        Object[] row = rows[i];
        // Specifying the right type info length tells LazyBinaryDeserializeRead which is the last
        // column.
        LazyBinaryDeserializeRead lazyBinaryDeserializeRead = new LazyBinaryDeserializeRead(writePrimitiveTypeInfos, /* useExternalBuffer */
        false);
        BytesWritable bytesWritable = serializeWriteBytes[i];
        lazyBinaryDeserializeRead.set(bytesWritable.getBytes(), 0, bytesWritable.getLength());
        for (int index = 0; index < columnCount; index++) {
            if (useIncludeColumns && !columnsToInclude[index]) {
                lazyBinaryDeserializeRead.skipNextField();
            } else if (index >= writeColumnCount) {
                // Should come back a null.
                VerifyFast.verifyDeserializeRead(lazyBinaryDeserializeRead, primitiveTypeInfos[index], null);
            } else {
                Writable writable = (Writable) row[index];
                VerifyFast.verifyDeserializeRead(lazyBinaryDeserializeRead, primitiveTypeInfos[index], writable);
            }
        }
        if (writeColumnCount == columnCount) {
            TestCase.assertTrue(lazyBinaryDeserializeRead.isEndOfInputReached());
        }
    }
    // Try to deserialize using SerDe class our Writable row objects created by SerializeWrite.
    for (int i = 0; i < rowCount; i++) {
        BytesWritable bytesWritable = serializeWriteBytes[i];
        LazyBinaryStruct lazyBinaryStruct;
        if (doWriteFewerColumns) {
            lazyBinaryStruct = (LazyBinaryStruct) serde_fewer.deserialize(bytesWritable);
        } else {
            lazyBinaryStruct = (LazyBinaryStruct) serde.deserialize(bytesWritable);
        }
        Object[] row = rows[i];
        for (int index = 0; index < writeColumnCount; index++) {
            PrimitiveTypeInfo primitiveTypeInfo = primitiveTypeInfos[index];
            Writable writable = (Writable) row[index];
            Object object = lazyBinaryStruct.getField(index);
            if (writable == null || object == null) {
                if (writable != null || object != null) {
                    fail("SerDe deserialized NULL column mismatch");
                }
            } else {
                if (!object.equals(writable)) {
                    fail("SerDe deserialized value does not match");
                }
            }
        }
    }
    // One Writable per row.
    BytesWritable[] serdeBytes = new BytesWritable[rowCount];
    // Serialize using the SerDe, then below deserialize using DeserializeRead.
    Object[] serdeRow = new Object[writeColumnCount];
    for (int i = 0; i < rowCount; i++) {
        Object[] row = rows[i];
        // LazyBinary seems to work better with an row object array instead of a Java object...
        for (int index = 0; index < writeColumnCount; index++) {
            serdeRow[index] = row[index];
        }
        BytesWritable serialized;
        if (doWriteFewerColumns) {
            serialized = (BytesWritable) serde_fewer.serialize(serdeRow, writeRowOI);
        } else {
            serialized = (BytesWritable) serde.serialize(serdeRow, rowOI);
        }
        BytesWritable bytesWritable = new BytesWritable(Arrays.copyOfRange(serialized.getBytes(), 0, serialized.getLength()));
        byte[] bytes1 = bytesWritable.getBytes();
        BytesWritable lazySerializedWriteBytes = serializeWriteBytes[i];
        byte[] bytes2 = Arrays.copyOfRange(lazySerializedWriteBytes.getBytes(), 0, lazySerializedWriteBytes.getLength());
        if (bytes1.length != bytes2.length) {
            fail("SerializeWrite length " + bytes2.length + " and " + "SerDe serialization length " + bytes1.length + " do not match (" + Arrays.toString(primitiveTypeInfos) + ")");
        }
        if (!Arrays.equals(bytes1, bytes2)) {
            fail("SerializeWrite and SerDe serialization does not match (" + Arrays.toString(primitiveTypeInfos) + ")");
        }
        serdeBytes[i] = bytesWritable;
    }
    // Try to deserialize using DeserializeRead our Writable row objects created by SerDe.
    for (int i = 0; i < rowCount; i++) {
        Object[] row = rows[i];
        // When doWriteFewerColumns, try to read more fields than exist in buffer.
        LazyBinaryDeserializeRead lazyBinaryDeserializeRead = new LazyBinaryDeserializeRead(primitiveTypeInfos, /* useExternalBuffer */
        false);
        BytesWritable bytesWritable = serdeBytes[i];
        lazyBinaryDeserializeRead.set(bytesWritable.getBytes(), 0, bytesWritable.getLength());
        for (int index = 0; index < columnCount; index++) {
            if (useIncludeColumns && !columnsToInclude[index]) {
                lazyBinaryDeserializeRead.skipNextField();
            } else if (index >= writeColumnCount) {
                // Should come back a null.
                VerifyFast.verifyDeserializeRead(lazyBinaryDeserializeRead, primitiveTypeInfos[index], null);
            } else {
                Writable writable = (Writable) row[index];
                VerifyFast.verifyDeserializeRead(lazyBinaryDeserializeRead, primitiveTypeInfos[index], writable);
            }
        }
        if (writeColumnCount == columnCount) {
            TestCase.assertTrue(lazyBinaryDeserializeRead.isEndOfInputReached());
        }
    }
}

Also used : LazyBinarySerializeWrite(org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinarySerializeWrite) Writable(org.apache.hadoop.io.Writable) BytesWritable(org.apache.hadoop.io.BytesWritable) BytesWritable(org.apache.hadoop.io.BytesWritable) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) Output(org.apache.hadoop.hive.serde2.ByteStream.Output) LazyBinaryDeserializeRead(org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead)

Example 9 with AbstractSerDe

use of org.apache.hadoop.hive.serde2.AbstractSerDe in project hive by apache.

the class SQLOperation method getSerDe.

private AbstractSerDe getSerDe() throws SQLException {
    if (serde != null) {
        return serde;
    }
    try {
        List<FieldSchema> fieldSchemas = mResultSchema.getFieldSchemas();
        StringBuilder namesSb = new StringBuilder();
        StringBuilder typesSb = new StringBuilder();
        if (fieldSchemas != null && !fieldSchemas.isEmpty()) {
            for (int pos = 0; pos < fieldSchemas.size(); pos++) {
                if (pos != 0) {
                    namesSb.append(",");
                    typesSb.append(",");
                }
                namesSb.append(fieldSchemas.get(pos).getName());
                typesSb.append(fieldSchemas.get(pos).getType());
            }
        }
        String names = namesSb.toString();
        String types = typesSb.toString();
        serde = new LazySimpleSerDe();
        Properties props = new Properties();
        if (names.length() > 0) {
            LOG.debug("Column names: " + names);
            props.setProperty(serdeConstants.LIST_COLUMNS, names);
        }
        if (types.length() > 0) {
            LOG.debug("Column types: " + types);
            props.setProperty(serdeConstants.LIST_COLUMN_TYPES, types);
        }
        SerDeUtils.initializeSerDe(serde, queryState.getConf(), props, null);
    } catch (Exception ex) {
        ex.printStackTrace();
        throw new SQLException("Could not create ResultSet: " + ex.getMessage(), ex);
    }
    return serde;
}

Also used : SQLException(java.sql.SQLException) HiveSQLException(org.apache.hive.service.cli.HiveSQLException) LazySimpleSerDe(org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) Properties(java.util.Properties) JsonGenerationException(org.codehaus.jackson.JsonGenerationException) UnsupportedEncodingException(java.io.UnsupportedEncodingException) SQLException(java.sql.SQLException) RejectedExecutionException(java.util.concurrent.RejectedExecutionException) JsonMappingException(org.codehaus.jackson.map.JsonMappingException) HiveSQLException(org.apache.hive.service.cli.HiveSQLException) IOException(java.io.IOException) SerDeException(org.apache.hadoop.hive.serde2.SerDeException)

Example 10 with AbstractSerDe

use of org.apache.hadoop.hive.serde2.AbstractSerDe in project hive by apache.

the class TestBinarySortableFast method testBinarySortableFast.

private void testBinarySortableFast(SerdeRandomRowSource source, Object[][] rows, boolean[] columnSortOrderIsDesc, byte[] columnNullMarker, byte[] columnNotNullMarker, AbstractSerDe serde, StructObjectInspector rowOI, AbstractSerDe serde_fewer, StructObjectInspector writeRowOI, boolean ascending, TypeInfo[] typeInfos, boolean useIncludeColumns, boolean doWriteFewerColumns, Random r) throws Throwable {
    int rowCount = rows.length;
    int columnCount = typeInfos.length;
    boolean[] columnsToInclude = null;
    if (useIncludeColumns) {
        columnsToInclude = new boolean[columnCount];
        for (int i = 0; i < columnCount; i++) {
            columnsToInclude[i] = r.nextBoolean();
        }
    }
    int writeColumnCount = columnCount;
    if (doWriteFewerColumns) {
        writeColumnCount = writeRowOI.getAllStructFieldRefs().size();
    }
    BinarySortableSerializeWrite binarySortableSerializeWrite = new BinarySortableSerializeWrite(columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker);
    // Try to serialize
    // One Writable per row.
    BytesWritable[] serializeWriteBytes = new BytesWritable[rowCount];
    int[][] perFieldWriteLengthsArray = new int[rowCount][];
    for (int i = 0; i < rowCount; i++) {
        Object[] row = rows[i];
        Output output = new Output();
        binarySortableSerializeWrite.set(output);
        int[] perFieldWriteLengths = new int[columnCount];
        for (int index = 0; index < writeColumnCount; index++) {
            VerifyFast.serializeWrite(binarySortableSerializeWrite, typeInfos[index], row[index]);
            perFieldWriteLengths[index] = output.getLength();
        }
        perFieldWriteLengthsArray[i] = perFieldWriteLengths;
        BytesWritable bytesWritable = new BytesWritable();
        bytesWritable.set(output.getData(), 0, output.getLength());
        serializeWriteBytes[i] = bytesWritable;
        if (i > 0) {
            BytesWritable previousBytesWritable = serializeWriteBytes[i - 1];
            int compareResult = previousBytesWritable.compareTo(bytesWritable);
            if ((compareResult < 0 && !ascending) || (compareResult > 0 && ascending)) {
                System.out.println("Test failed in " + (ascending ? "ascending" : "descending") + " order with " + (i - 1) + " and " + i);
                System.out.println("serialized data [" + (i - 1) + "] = " + TestBinarySortableSerDe.hexString(serializeWriteBytes[i - 1]));
                System.out.println("serialized data [" + i + "] = " + TestBinarySortableSerDe.hexString(serializeWriteBytes[i]));
                fail("Sort order of serialized " + (i - 1) + " and " + i + " are reversed!");
            }
        }
    }
    // Try to deserialize using DeserializeRead our Writable row objects created by SerializeWrite.
    for (int i = 0; i < rowCount; i++) {
        Object[] row = rows[i];
        BinarySortableDeserializeRead binarySortableDeserializeRead = new BinarySortableDeserializeRead(typeInfos, /* useExternalBuffer */
        false, columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker);
        BytesWritable bytesWritable = serializeWriteBytes[i];
        binarySortableDeserializeRead.set(bytesWritable.getBytes(), 0, bytesWritable.getLength());
        for (int index = 0; index < columnCount; index++) {
            if (useIncludeColumns && !columnsToInclude[index]) {
                binarySortableDeserializeRead.skipNextField();
            } else if (index >= writeColumnCount) {
                // Should come back a null.
                VerifyFast.verifyDeserializeRead(binarySortableDeserializeRead, typeInfos[index], null);
            } else {
                verifyRead(binarySortableDeserializeRead, typeInfos[index], row[index]);
            }
        }
        if (writeColumnCount == columnCount) {
            TestCase.assertTrue(binarySortableDeserializeRead.isEndOfInputReached());
        }
        /*
       * Clip off one byte and expect to get an EOFException on the write field.
       */
        BinarySortableDeserializeRead binarySortableDeserializeRead2 = new BinarySortableDeserializeRead(typeInfos, /* useExternalBuffer */
        false, columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker);
        binarySortableDeserializeRead2.set(bytesWritable.getBytes(), 0, // One fewer byte.
        bytesWritable.getLength() - 1);
        for (int index = 0; index < writeColumnCount; index++) {
            if (index == writeColumnCount - 1) {
                boolean threw = false;
                try {
                    verifyRead(binarySortableDeserializeRead2, typeInfos[index], row[index]);
                } catch (EOFException e) {
                    // debugDetailedReadPositionString = binarySortableDeserializeRead2.getDetailedReadPositionString();
                    // debugStackTrace = e.getStackTrace();
                    threw = true;
                }
                if (!threw && row[index] != null) {
                    Assert.fail();
                }
            } else {
                if (useIncludeColumns && !columnsToInclude[index]) {
                    binarySortableDeserializeRead2.skipNextField();
                } else {
                    verifyRead(binarySortableDeserializeRead2, typeInfos[index], row[index]);
                }
            }
        }
    }
    // Try to deserialize using SerDe class our Writable row objects created by SerializeWrite.
    for (int i = 0; i < rowCount; i++) {
        BytesWritable bytesWritable = serializeWriteBytes[i];
        // Note that regular SerDe doesn't tolerate fewer columns.
        List<Object> deserializedRow;
        if (doWriteFewerColumns) {
            deserializedRow = (List<Object>) serde_fewer.deserialize(bytesWritable);
        } else {
            deserializedRow = (List<Object>) serde.deserialize(bytesWritable);
        }
        Object[] row = rows[i];
        for (int index = 0; index < writeColumnCount; index++) {
            Object expected = row[index];
            Object object = deserializedRow.get(index);
            if (expected == null || object == null) {
                if (expected != null || object != null) {
                    fail("SerDe deserialized NULL column mismatch");
                }
            } else {
                if (!object.equals(expected)) {
                    fail("SerDe deserialized value does not match (expected " + expected.getClass().getName() + " " + expected.toString() + ", actual " + object.getClass().getName() + " " + object.toString() + ")");
                }
            }
        }
    }
    // One Writable per row.
    BytesWritable[] serdeBytes = new BytesWritable[rowCount];
    // Serialize using the SerDe, then below deserialize using DeserializeRead.
    for (int i = 0; i < rowCount; i++) {
        Object[] row = rows[i];
        // Since SerDe reuses memory, we will need to make a copy.
        BytesWritable serialized;
        if (doWriteFewerColumns) {
            serialized = (BytesWritable) serde_fewer.serialize(row, rowOI);
        } else {
            serialized = (BytesWritable) serde.serialize(row, rowOI);
            ;
        }
        BytesWritable bytesWritable = new BytesWritable();
        bytesWritable.set(serialized);
        byte[] serDeOutput = Arrays.copyOfRange(bytesWritable.getBytes(), 0, bytesWritable.getLength());
        byte[] serializeWriteExpected = Arrays.copyOfRange(serializeWriteBytes[i].getBytes(), 0, serializeWriteBytes[i].getLength());
        if (!Arrays.equals(serDeOutput, serializeWriteExpected)) {
            int mismatchPos = -1;
            if (serDeOutput.length != serializeWriteExpected.length) {
                for (int b = 0; b < Math.min(serDeOutput.length, serializeWriteExpected.length); b++) {
                    if (serDeOutput[b] != serializeWriteExpected[b]) {
                        mismatchPos = b;
                        break;
                    }
                }
                fail("Different byte array lengths: serDeOutput.length " + serDeOutput.length + ", serializeWriteExpected.length " + serializeWriteExpected.length + " mismatchPos " + mismatchPos + " perFieldWriteLengths " + Arrays.toString(perFieldWriteLengthsArray[i]));
            }
            List<Integer> differentPositions = new ArrayList();
            for (int b = 0; b < serDeOutput.length; b++) {
                if (serDeOutput[b] != serializeWriteExpected[b]) {
                    differentPositions.add(b);
                }
            }
            if (differentPositions.size() > 0) {
                List<String> serializeWriteExpectedFields = new ArrayList<String>();
                List<String> serDeFields = new ArrayList<String>();
                int f = 0;
                int lastBegin = 0;
                for (int b = 0; b < serDeOutput.length; b++) {
                    int writeLength = perFieldWriteLengthsArray[i][f];
                    if (b + 1 == writeLength) {
                        serializeWriteExpectedFields.add(displayBytes(serializeWriteExpected, lastBegin, writeLength - lastBegin));
                        serDeFields.add(displayBytes(serDeOutput, lastBegin, writeLength - lastBegin));
                        f++;
                        lastBegin = b + 1;
                    }
                }
                fail("SerializeWrite and SerDe serialization does not match at positions " + differentPositions.toString() + "\n(SerializeWrite: " + serializeWriteExpectedFields.toString() + "\nSerDe: " + serDeFields.toString() + "\nperFieldWriteLengths " + Arrays.toString(perFieldWriteLengthsArray[i]) + "\nprimitiveTypeInfos " + Arrays.toString(typeInfos) + "\nrow " + Arrays.toString(row));
            }
        }
        serdeBytes[i] = bytesWritable;
    }
    // Try to deserialize using DeserializeRead our Writable row objects created by SerDe.
    for (int i = 0; i < rowCount; i++) {
        Object[] row = rows[i];
        BinarySortableDeserializeRead binarySortableDeserializeRead = new BinarySortableDeserializeRead(typeInfos, /* useExternalBuffer */
        false, columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker);
        BytesWritable bytesWritable = serdeBytes[i];
        binarySortableDeserializeRead.set(bytesWritable.getBytes(), 0, bytesWritable.getLength());
        for (int index = 0; index < columnCount; index++) {
            if (useIncludeColumns && !columnsToInclude[index]) {
                binarySortableDeserializeRead.skipNextField();
            } else if (index >= writeColumnCount) {
                // Should come back a null.
                verifyRead(binarySortableDeserializeRead, typeInfos[index], null);
            } else {
                verifyRead(binarySortableDeserializeRead, typeInfos[index], row[index]);
            }
        }
        if (writeColumnCount == columnCount) {
            TestCase.assertTrue(binarySortableDeserializeRead.isEndOfInputReached());
        }
    }
}

Also used : BinarySortableDeserializeRead(org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead) ArrayList(java.util.ArrayList) BytesWritable(org.apache.hadoop.io.BytesWritable) BinarySortableSerializeWrite(org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite) Output(org.apache.hadoop.hive.serde2.ByteStream.Output) EOFException(java.io.EOFException) UnionObject(org.apache.hadoop.hive.serde2.objectinspector.UnionObject)

Aggregations

AbstractSerDe (org.apache.hadoop.hive.serde2.AbstractSerDe)43 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)25 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)17 SerDeException (org.apache.hadoop.hive.serde2.SerDeException)15 ArrayList (java.util.ArrayList)12 Properties (java.util.Properties)12 BytesWritable (org.apache.hadoop.io.BytesWritable)11 IOException (java.io.IOException)8 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)8 Writable (org.apache.hadoop.io.Writable)8 MapObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector)7 InputSplit (org.apache.hadoop.mapred.InputSplit)7 Test (org.junit.Test)7 AbstractPrimitiveLazyObjectInspector (org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.AbstractPrimitiveLazyObjectInspector)6 LazyBinaryMapObjectInspector (org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryMapObjectInspector)6 JavaBinaryObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaBinaryObjectInspector)6 WritableBinaryObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBinaryObjectInspector)6 LinkedHashMap (java.util.LinkedHashMap)5 Path (org.apache.hadoop.fs.Path)5 TableDesc (org.apache.hadoop.hive.ql.plan.TableDesc)5