Search in sources :

Example 91 with BytesWritable

use of org.apache.hadoop.io.BytesWritable in project hive by apache.

the class TestDynamicSerDe method testNulls2.

/**
 * Tests all elements of a struct being null with return nulls on.
 */
public void testNulls2() throws Throwable {
    try {
        // Try to construct an object
        ArrayList<String> bye = null;
        HashMap<String, Integer> another = null;
        ArrayList<Object> struct = new ArrayList<Object>();
        struct.add(null);
        struct.add(bye);
        struct.add(another);
        Properties schema = new Properties();
        schema.setProperty(serdeConstants.SERIALIZATION_FORMAT, org.apache.hadoop.hive.serde2.thrift.TCTLSeparatedProtocol.class.getName());
        schema.setProperty(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_NAME, "test");
        schema.setProperty(serdeConstants.SERIALIZATION_DDL, "struct test { i32 hello, list<string> bye, map<string,i32> another}");
        schema.setProperty(serdeConstants.SERIALIZATION_LIB, new DynamicSerDe().getClass().toString());
        schema.setProperty(TCTLSeparatedProtocol.ReturnNullsKey, "true");
        DynamicSerDe serde = new DynamicSerDe();
        serde.initialize(new Configuration(), schema);
        ObjectInspector oi = serde.getObjectInspector();
        // Try to serialize
        BytesWritable bytes = (BytesWritable) serde.serialize(struct, oi);
        hexString(bytes);
        // Try to deserialize
        Object o = serde.deserialize(bytes);
        List<?> olist = (List<?>) o;
        assertTrue(olist.size() == 3);
        assertEquals(null, olist.get(0));
        assertEquals(null, olist.get(1));
        assertEquals(null, olist.get(2));
    // assertEquals(o, struct); Cannot do this because types of null lists are
    // wrong.
    } catch (Throwable e) {
        e.printStackTrace();
        throw e;
    }
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) Configuration(org.apache.hadoop.conf.Configuration) ArrayList(java.util.ArrayList) BytesWritable(org.apache.hadoop.io.BytesWritable) Properties(java.util.Properties) ArrayList(java.util.ArrayList) List(java.util.List) TCTLSeparatedProtocol(org.apache.hadoop.hive.serde2.thrift.TCTLSeparatedProtocol)

Example 92 with BytesWritable

use of org.apache.hadoop.io.BytesWritable in project hive by apache.

the class TestDynamicSerDe method testNulls1.

/**
 * Tests a single null list within a struct with return nulls on.
 */
public void testNulls1() throws Throwable {
    try {
        // Try to construct an object
        ArrayList<String> bye = null;
        HashMap<String, Integer> another = new HashMap<String, Integer>();
        another.put("firstKey", 1);
        another.put("secondKey", 2);
        ArrayList<Object> struct = new ArrayList<Object>();
        struct.add(Integer.valueOf(234));
        struct.add(bye);
        struct.add(another);
        Properties schema = new Properties();
        schema.setProperty(serdeConstants.SERIALIZATION_FORMAT, org.apache.hadoop.hive.serde2.thrift.TCTLSeparatedProtocol.class.getName());
        schema.setProperty(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_NAME, "test");
        schema.setProperty(serdeConstants.SERIALIZATION_DDL, "struct test { i32 hello, list<string> bye, map<string,i32> another}");
        schema.setProperty(serdeConstants.SERIALIZATION_LIB, new DynamicSerDe().getClass().toString());
        schema.setProperty(TCTLSeparatedProtocol.ReturnNullsKey, "true");
        DynamicSerDe serde = new DynamicSerDe();
        serde.initialize(new Configuration(), schema);
        ObjectInspector oi = serde.getObjectInspector();
        // Try to serialize
        BytesWritable bytes = (BytesWritable) serde.serialize(struct, oi);
        hexString(bytes);
        // Try to deserialize
        Object o = serde.deserialize(bytes);
        assertEquals(struct, o);
    } catch (Throwable e) {
        e.printStackTrace();
        throw e;
    }
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) Configuration(org.apache.hadoop.conf.Configuration) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) ArrayList(java.util.ArrayList) BytesWritable(org.apache.hadoop.io.BytesWritable) Properties(java.util.Properties) TCTLSeparatedProtocol(org.apache.hadoop.hive.serde2.thrift.TCTLSeparatedProtocol)

Example 93 with BytesWritable

use of org.apache.hadoop.io.BytesWritable in project hive by apache.

the class TestDynamicSerDe method testConfigurableTCTLSeparated.

public void testConfigurableTCTLSeparated() throws Throwable {
    try {
        // Try to construct an object
        ArrayList<String> bye = new ArrayList<String>();
        bye.add("firstString");
        bye.add("secondString");
        LinkedHashMap<String, Integer> another = new LinkedHashMap<String, Integer>();
        another.put("firstKey", 1);
        another.put("secondKey", 2);
        ArrayList<Object> struct = new ArrayList<Object>();
        struct.add(Integer.valueOf(234));
        struct.add(bye);
        struct.add(another);
        Properties schema = new Properties();
        schema.setProperty(serdeConstants.SERIALIZATION_FORMAT, org.apache.hadoop.hive.serde2.thrift.TCTLSeparatedProtocol.class.getName());
        schema.setProperty(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_NAME, "test");
        schema.setProperty(serdeConstants.SERIALIZATION_DDL, "struct test { i32 hello, list<string> bye, map<string,i32> another}");
        schema.setProperty(serdeConstants.SERIALIZATION_LIB, new DynamicSerDe().getClass().toString());
        schema.setProperty(serdeConstants.FIELD_DELIM, "9");
        schema.setProperty(serdeConstants.COLLECTION_DELIM, "1");
        schema.setProperty(serdeConstants.LINE_DELIM, "2");
        schema.setProperty(serdeConstants.MAPKEY_DELIM, "4");
        DynamicSerDe serde = new DynamicSerDe();
        serde.initialize(new Configuration(), schema);
        TCTLSeparatedProtocol prot = (TCTLSeparatedProtocol) serde.oprot_;
        assertTrue(prot.getPrimarySeparator().equals("\u0009"));
        ObjectInspector oi = serde.getObjectInspector();
        // Try to serialize
        BytesWritable bytes = (BytesWritable) serde.serialize(struct, oi);
        hexString(bytes);
        String compare = "234" + "\u0009" + "firstString" + "\u0001" + "secondString" + "\u0009" + "firstKey" + "\u0004" + "1" + "\u0001" + "secondKey" + "\u0004" + "2";
        System.out.println("bytes in text =" + new String(bytes.get(), 0, bytes.getSize()) + ">");
        System.out.println("compare to    =" + compare + ">");
        assertTrue(compare.equals(new String(bytes.get(), 0, bytes.getSize())));
        // Try to deserialize
        Object o = serde.deserialize(bytes);
        System.out.println("o class = " + o.getClass());
        List<?> olist = (List<?>) o;
        System.out.println("o size = " + olist.size());
        System.out.println("o[0] class = " + olist.get(0).getClass());
        System.out.println("o[1] class = " + olist.get(1).getClass());
        System.out.println("o[2] class = " + olist.get(2).getClass());
        System.out.println("o = " + o);
        assertEquals(o, struct);
    } catch (Throwable e) {
        e.printStackTrace();
        throw e;
    }
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) Configuration(org.apache.hadoop.conf.Configuration) ArrayList(java.util.ArrayList) BytesWritable(org.apache.hadoop.io.BytesWritable) Properties(java.util.Properties) LinkedHashMap(java.util.LinkedHashMap) ArrayList(java.util.ArrayList) List(java.util.List) TCTLSeparatedProtocol(org.apache.hadoop.hive.serde2.thrift.TCTLSeparatedProtocol)

Example 94 with BytesWritable

use of org.apache.hadoop.io.BytesWritable in project hive by apache.

the class VerifyFast method doVerifyDeserializeRead.

public static void doVerifyDeserializeRead(DeserializeRead deserializeRead, TypeInfo typeInfo, Object object, boolean isNull) throws IOException {
    if (isNull) {
        if (object != null) {
            TestCase.fail("Field reports null but object is not null (class " + object.getClass().getName() + ", " + object.toString() + ")");
        }
        return;
    } else if (object == null) {
        TestCase.fail("Field report not null but object is null");
    }
    switch(typeInfo.getCategory()) {
        case PRIMITIVE:
            {
                PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) typeInfo;
                switch(primitiveTypeInfo.getPrimitiveCategory()) {
                    case BOOLEAN:
                        {
                            boolean value = deserializeRead.currentBoolean;
                            if (!(object instanceof BooleanWritable)) {
                                TestCase.fail("Boolean expected writable not Boolean");
                            }
                            boolean expected = ((BooleanWritable) object).get();
                            if (value != expected) {
                                TestCase.fail("Boolean field mismatch (expected " + expected + " found " + value + ")");
                            }
                        }
                        break;
                    case BYTE:
                        {
                            byte value = deserializeRead.currentByte;
                            if (!(object instanceof ByteWritable)) {
                                TestCase.fail("Byte expected writable not Byte");
                            }
                            byte expected = ((ByteWritable) object).get();
                            if (value != expected) {
                                TestCase.fail("Byte field mismatch (expected " + (int) expected + " found " + (int) value + ")");
                            }
                        }
                        break;
                    case SHORT:
                        {
                            short value = deserializeRead.currentShort;
                            if (!(object instanceof ShortWritable)) {
                                TestCase.fail("Short expected writable not Short");
                            }
                            short expected = ((ShortWritable) object).get();
                            if (value != expected) {
                                TestCase.fail("Short field mismatch (expected " + expected + " found " + value + ")");
                            }
                        }
                        break;
                    case INT:
                        {
                            int value = deserializeRead.currentInt;
                            if (!(object instanceof IntWritable)) {
                                TestCase.fail("Integer expected writable not Integer");
                            }
                            int expected = ((IntWritable) object).get();
                            if (value != expected) {
                                TestCase.fail("Int field mismatch (expected " + expected + " found " + value + ")");
                            }
                        }
                        break;
                    case LONG:
                        {
                            long value = deserializeRead.currentLong;
                            if (!(object instanceof LongWritable)) {
                                TestCase.fail("Long expected writable not Long");
                            }
                            Long expected = ((LongWritable) object).get();
                            if (value != expected) {
                                TestCase.fail("Long field mismatch (expected " + expected + " found " + value + ")");
                            }
                        }
                        break;
                    case FLOAT:
                        {
                            float value = deserializeRead.currentFloat;
                            if (!(object instanceof FloatWritable)) {
                                TestCase.fail("Float expected writable not Float");
                            }
                            float expected = ((FloatWritable) object).get();
                            if (value != expected) {
                                TestCase.fail("Float field mismatch (expected " + expected + " found " + value + ")");
                            }
                        }
                        break;
                    case DOUBLE:
                        {
                            double value = deserializeRead.currentDouble;
                            if (!(object instanceof DoubleWritable)) {
                                TestCase.fail("Double expected writable not Double");
                            }
                            double expected = ((DoubleWritable) object).get();
                            if (value != expected) {
                                TestCase.fail("Double field mismatch (expected " + expected + " found " + value + ")");
                            }
                        }
                        break;
                    case STRING:
                        {
                            byte[] stringBytes = Arrays.copyOfRange(deserializeRead.currentBytes, deserializeRead.currentBytesStart, deserializeRead.currentBytesStart + deserializeRead.currentBytesLength);
                            Text text = new Text(stringBytes);
                            String string = text.toString();
                            String expected = ((Text) object).toString();
                            if (!string.equals(expected)) {
                                TestCase.fail("String field mismatch (expected '" + expected + "' found '" + string + "')");
                            }
                        }
                        break;
                    case CHAR:
                        {
                            byte[] stringBytes = Arrays.copyOfRange(deserializeRead.currentBytes, deserializeRead.currentBytesStart, deserializeRead.currentBytesStart + deserializeRead.currentBytesLength);
                            Text text = new Text(stringBytes);
                            String string = text.toString();
                            HiveChar hiveChar = new HiveChar(string, ((CharTypeInfo) primitiveTypeInfo).getLength());
                            HiveChar expected = ((HiveCharWritable) object).getHiveChar();
                            if (!hiveChar.equals(expected)) {
                                TestCase.fail("Char field mismatch (expected '" + expected + "' found '" + hiveChar + "')");
                            }
                        }
                        break;
                    case VARCHAR:
                        {
                            byte[] stringBytes = Arrays.copyOfRange(deserializeRead.currentBytes, deserializeRead.currentBytesStart, deserializeRead.currentBytesStart + deserializeRead.currentBytesLength);
                            Text text = new Text(stringBytes);
                            String string = text.toString();
                            HiveVarchar hiveVarchar = new HiveVarchar(string, ((VarcharTypeInfo) primitiveTypeInfo).getLength());
                            HiveVarchar expected = ((HiveVarcharWritable) object).getHiveVarchar();
                            if (!hiveVarchar.equals(expected)) {
                                TestCase.fail("Varchar field mismatch (expected '" + expected + "' found '" + hiveVarchar + "')");
                            }
                        }
                        break;
                    case DECIMAL:
                        {
                            HiveDecimal value = deserializeRead.currentHiveDecimalWritable.getHiveDecimal();
                            if (value == null) {
                                TestCase.fail("Decimal field evaluated to NULL");
                            }
                            HiveDecimal expected = ((HiveDecimalWritable) object).getHiveDecimal();
                            if (!value.equals(expected)) {
                                DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) primitiveTypeInfo;
                                int precision = decimalTypeInfo.getPrecision();
                                int scale = decimalTypeInfo.getScale();
                                TestCase.fail("Decimal field mismatch (expected " + expected.toString() + " found " + value.toString() + ") precision " + precision + ", scale " + scale);
                            }
                        }
                        break;
                    case DATE:
                        {
                            Date value = deserializeRead.currentDateWritable.get();
                            Date expected = ((DateWritable) object).get();
                            if (!value.equals(expected)) {
                                TestCase.fail("Date field mismatch (expected " + expected.toString() + " found " + value.toString() + ")");
                            }
                        }
                        break;
                    case TIMESTAMP:
                        {
                            Timestamp value = deserializeRead.currentTimestampWritable.getTimestamp();
                            Timestamp expected = ((TimestampWritable) object).getTimestamp();
                            if (!value.equals(expected)) {
                                TestCase.fail("Timestamp field mismatch (expected " + expected.toString() + " found " + value.toString() + ")");
                            }
                        }
                        break;
                    case INTERVAL_YEAR_MONTH:
                        {
                            HiveIntervalYearMonth value = deserializeRead.currentHiveIntervalYearMonthWritable.getHiveIntervalYearMonth();
                            HiveIntervalYearMonth expected = ((HiveIntervalYearMonthWritable) object).getHiveIntervalYearMonth();
                            if (!value.equals(expected)) {
                                TestCase.fail("HiveIntervalYearMonth field mismatch (expected " + expected.toString() + " found " + value.toString() + ")");
                            }
                        }
                        break;
                    case INTERVAL_DAY_TIME:
                        {
                            HiveIntervalDayTime value = deserializeRead.currentHiveIntervalDayTimeWritable.getHiveIntervalDayTime();
                            HiveIntervalDayTime expected = ((HiveIntervalDayTimeWritable) object).getHiveIntervalDayTime();
                            if (!value.equals(expected)) {
                                TestCase.fail("HiveIntervalDayTime field mismatch (expected " + expected.toString() + " found " + value.toString() + ")");
                            }
                        }
                        break;
                    case BINARY:
                        {
                            byte[] byteArray = Arrays.copyOfRange(deserializeRead.currentBytes, deserializeRead.currentBytesStart, deserializeRead.currentBytesStart + deserializeRead.currentBytesLength);
                            BytesWritable bytesWritable = (BytesWritable) object;
                            byte[] expected = Arrays.copyOfRange(bytesWritable.getBytes(), 0, bytesWritable.getLength());
                            if (byteArray.length != expected.length) {
                                TestCase.fail("Byte Array field mismatch (expected " + Arrays.toString(expected) + " found " + Arrays.toString(byteArray) + ")");
                            }
                            for (int b = 0; b < byteArray.length; b++) {
                                if (byteArray[b] != expected[b]) {
                                    TestCase.fail("Byte Array field mismatch (expected " + Arrays.toString(expected) + " found " + Arrays.toString(byteArray) + ")");
                                }
                            }
                        }
                        break;
                    default:
                        throw new Error("Unknown primitive category " + primitiveTypeInfo.getPrimitiveCategory());
                }
            }
            break;
        case LIST:
        case MAP:
        case STRUCT:
        case UNION:
            throw new Error("Complex types need to be handled separately");
        default:
            throw new Error("Unknown category " + typeInfo.getCategory());
    }
}
Also used : HiveChar(org.apache.hadoop.hive.common.type.HiveChar) TimestampWritable(org.apache.hadoop.hive.serde2.io.TimestampWritable) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) Timestamp(java.sql.Timestamp) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) HiveDecimal(org.apache.hadoop.hive.common.type.HiveDecimal) LongWritable(org.apache.hadoop.io.LongWritable) ByteWritable(org.apache.hadoop.hive.serde2.io.ByteWritable) IntWritable(org.apache.hadoop.io.IntWritable) HiveIntervalDayTime(org.apache.hadoop.hive.common.type.HiveIntervalDayTime) HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable) DateWritable(org.apache.hadoop.hive.serde2.io.DateWritable) HiveCharWritable(org.apache.hadoop.hive.serde2.io.HiveCharWritable) HiveVarcharWritable(org.apache.hadoop.hive.serde2.io.HiveVarcharWritable) Text(org.apache.hadoop.io.Text) HiveIntervalDayTimeWritable(org.apache.hadoop.hive.serde2.io.HiveIntervalDayTimeWritable) BytesWritable(org.apache.hadoop.io.BytesWritable) HiveVarchar(org.apache.hadoop.hive.common.type.HiveVarchar) HiveIntervalYearMonthWritable(org.apache.hadoop.hive.serde2.io.HiveIntervalYearMonthWritable) Date(java.sql.Date) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) FloatWritable(org.apache.hadoop.io.FloatWritable) HiveIntervalYearMonth(org.apache.hadoop.hive.common.type.HiveIntervalYearMonth) BooleanWritable(org.apache.hadoop.io.BooleanWritable)

Example 95 with BytesWritable

use of org.apache.hadoop.io.BytesWritable in project hive by apache.

the class TestBinarySortableFast method testBinarySortableFast.

private void testBinarySortableFast(SerdeRandomRowSource source, Object[][] rows, boolean[] columnSortOrderIsDesc, byte[] columnNullMarker, byte[] columnNotNullMarker, AbstractSerDe serde, StructObjectInspector rowOI, AbstractSerDe serde_fewer, StructObjectInspector writeRowOI, boolean ascending, TypeInfo[] typeInfos, boolean useIncludeColumns, boolean doWriteFewerColumns, Random r) throws Throwable {
    int rowCount = rows.length;
    int columnCount = typeInfos.length;
    boolean[] columnsToInclude = null;
    if (useIncludeColumns) {
        columnsToInclude = new boolean[columnCount];
        for (int i = 0; i < columnCount; i++) {
            columnsToInclude[i] = r.nextBoolean();
        }
    }
    int writeColumnCount = columnCount;
    if (doWriteFewerColumns) {
        writeColumnCount = writeRowOI.getAllStructFieldRefs().size();
    }
    BinarySortableSerializeWrite binarySortableSerializeWrite = new BinarySortableSerializeWrite(columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker);
    // Try to serialize
    // One Writable per row.
    BytesWritable[] serializeWriteBytes = new BytesWritable[rowCount];
    int[][] perFieldWriteLengthsArray = new int[rowCount][];
    for (int i = 0; i < rowCount; i++) {
        Object[] row = rows[i];
        Output output = new Output();
        binarySortableSerializeWrite.set(output);
        int[] perFieldWriteLengths = new int[columnCount];
        for (int index = 0; index < writeColumnCount; index++) {
            VerifyFast.serializeWrite(binarySortableSerializeWrite, typeInfos[index], row[index]);
            perFieldWriteLengths[index] = output.getLength();
        }
        perFieldWriteLengthsArray[i] = perFieldWriteLengths;
        BytesWritable bytesWritable = new BytesWritable();
        bytesWritable.set(output.getData(), 0, output.getLength());
        serializeWriteBytes[i] = bytesWritable;
        if (i > 0) {
            BytesWritable previousBytesWritable = serializeWriteBytes[i - 1];
            int compareResult = previousBytesWritable.compareTo(bytesWritable);
            if ((compareResult < 0 && !ascending) || (compareResult > 0 && ascending)) {
                System.out.println("Test failed in " + (ascending ? "ascending" : "descending") + " order with " + (i - 1) + " and " + i);
                System.out.println("serialized data [" + (i - 1) + "] = " + TestBinarySortableSerDe.hexString(serializeWriteBytes[i - 1]));
                System.out.println("serialized data [" + i + "] = " + TestBinarySortableSerDe.hexString(serializeWriteBytes[i]));
                fail("Sort order of serialized " + (i - 1) + " and " + i + " are reversed!");
            }
        }
    }
    // Try to deserialize using DeserializeRead our Writable row objects created by SerializeWrite.
    for (int i = 0; i < rowCount; i++) {
        Object[] row = rows[i];
        BinarySortableDeserializeRead binarySortableDeserializeRead = new BinarySortableDeserializeRead(typeInfos, /* useExternalBuffer */
        false, columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker);
        BytesWritable bytesWritable = serializeWriteBytes[i];
        binarySortableDeserializeRead.set(bytesWritable.getBytes(), 0, bytesWritable.getLength());
        for (int index = 0; index < columnCount; index++) {
            if (useIncludeColumns && !columnsToInclude[index]) {
                binarySortableDeserializeRead.skipNextField();
            } else if (index >= writeColumnCount) {
                // Should come back a null.
                VerifyFast.verifyDeserializeRead(binarySortableDeserializeRead, typeInfos[index], null);
            } else {
                verifyRead(binarySortableDeserializeRead, typeInfos[index], row[index]);
            }
        }
        if (writeColumnCount == columnCount) {
            TestCase.assertTrue(binarySortableDeserializeRead.isEndOfInputReached());
        }
        /*
       * Clip off one byte and expect to get an EOFException on the write field.
       */
        BinarySortableDeserializeRead binarySortableDeserializeRead2 = new BinarySortableDeserializeRead(typeInfos, /* useExternalBuffer */
        false, columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker);
        binarySortableDeserializeRead2.set(bytesWritable.getBytes(), 0, // One fewer byte.
        bytesWritable.getLength() - 1);
        for (int index = 0; index < writeColumnCount; index++) {
            if (index == writeColumnCount - 1) {
                boolean threw = false;
                try {
                    verifyRead(binarySortableDeserializeRead2, typeInfos[index], row[index]);
                } catch (EOFException e) {
                    // debugDetailedReadPositionString = binarySortableDeserializeRead2.getDetailedReadPositionString();
                    // debugStackTrace = e.getStackTrace();
                    threw = true;
                }
                if (!threw && row[index] != null) {
                    Assert.fail();
                }
            } else {
                if (useIncludeColumns && !columnsToInclude[index]) {
                    binarySortableDeserializeRead2.skipNextField();
                } else {
                    verifyRead(binarySortableDeserializeRead2, typeInfos[index], row[index]);
                }
            }
        }
    }
    // Try to deserialize using SerDe class our Writable row objects created by SerializeWrite.
    for (int i = 0; i < rowCount; i++) {
        BytesWritable bytesWritable = serializeWriteBytes[i];
        // Note that regular SerDe doesn't tolerate fewer columns.
        List<Object> deserializedRow;
        if (doWriteFewerColumns) {
            deserializedRow = (List<Object>) serde_fewer.deserialize(bytesWritable);
        } else {
            deserializedRow = (List<Object>) serde.deserialize(bytesWritable);
        }
        Object[] row = rows[i];
        for (int index = 0; index < writeColumnCount; index++) {
            Object expected = row[index];
            Object object = deserializedRow.get(index);
            if (expected == null || object == null) {
                if (expected != null || object != null) {
                    fail("SerDe deserialized NULL column mismatch");
                }
            } else {
                if (!object.equals(expected)) {
                    fail("SerDe deserialized value does not match (expected " + expected.getClass().getName() + " " + expected.toString() + ", actual " + object.getClass().getName() + " " + object.toString() + ")");
                }
            }
        }
    }
    // One Writable per row.
    BytesWritable[] serdeBytes = new BytesWritable[rowCount];
    // Serialize using the SerDe, then below deserialize using DeserializeRead.
    for (int i = 0; i < rowCount; i++) {
        Object[] row = rows[i];
        // Since SerDe reuses memory, we will need to make a copy.
        BytesWritable serialized;
        if (doWriteFewerColumns) {
            serialized = (BytesWritable) serde_fewer.serialize(row, rowOI);
        } else {
            serialized = (BytesWritable) serde.serialize(row, rowOI);
            ;
        }
        BytesWritable bytesWritable = new BytesWritable();
        bytesWritable.set(serialized);
        byte[] serDeOutput = Arrays.copyOfRange(bytesWritable.getBytes(), 0, bytesWritable.getLength());
        byte[] serializeWriteExpected = Arrays.copyOfRange(serializeWriteBytes[i].getBytes(), 0, serializeWriteBytes[i].getLength());
        if (!Arrays.equals(serDeOutput, serializeWriteExpected)) {
            int mismatchPos = -1;
            if (serDeOutput.length != serializeWriteExpected.length) {
                for (int b = 0; b < Math.min(serDeOutput.length, serializeWriteExpected.length); b++) {
                    if (serDeOutput[b] != serializeWriteExpected[b]) {
                        mismatchPos = b;
                        break;
                    }
                }
                fail("Different byte array lengths: serDeOutput.length " + serDeOutput.length + ", serializeWriteExpected.length " + serializeWriteExpected.length + " mismatchPos " + mismatchPos + " perFieldWriteLengths " + Arrays.toString(perFieldWriteLengthsArray[i]));
            }
            List<Integer> differentPositions = new ArrayList();
            for (int b = 0; b < serDeOutput.length; b++) {
                if (serDeOutput[b] != serializeWriteExpected[b]) {
                    differentPositions.add(b);
                }
            }
            if (differentPositions.size() > 0) {
                List<String> serializeWriteExpectedFields = new ArrayList<String>();
                List<String> serDeFields = new ArrayList<String>();
                int f = 0;
                int lastBegin = 0;
                for (int b = 0; b < serDeOutput.length; b++) {
                    int writeLength = perFieldWriteLengthsArray[i][f];
                    if (b + 1 == writeLength) {
                        serializeWriteExpectedFields.add(displayBytes(serializeWriteExpected, lastBegin, writeLength - lastBegin));
                        serDeFields.add(displayBytes(serDeOutput, lastBegin, writeLength - lastBegin));
                        f++;
                        lastBegin = b + 1;
                    }
                }
                fail("SerializeWrite and SerDe serialization does not match at positions " + differentPositions.toString() + "\n(SerializeWrite: " + serializeWriteExpectedFields.toString() + "\nSerDe: " + serDeFields.toString() + "\nperFieldWriteLengths " + Arrays.toString(perFieldWriteLengthsArray[i]) + "\nprimitiveTypeInfos " + Arrays.toString(typeInfos) + "\nrow " + Arrays.toString(row));
            }
        }
        serdeBytes[i] = bytesWritable;
    }
    // Try to deserialize using DeserializeRead our Writable row objects created by SerDe.
    for (int i = 0; i < rowCount; i++) {
        Object[] row = rows[i];
        BinarySortableDeserializeRead binarySortableDeserializeRead = new BinarySortableDeserializeRead(typeInfos, /* useExternalBuffer */
        false, columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker);
        BytesWritable bytesWritable = serdeBytes[i];
        binarySortableDeserializeRead.set(bytesWritable.getBytes(), 0, bytesWritable.getLength());
        for (int index = 0; index < columnCount; index++) {
            if (useIncludeColumns && !columnsToInclude[index]) {
                binarySortableDeserializeRead.skipNextField();
            } else if (index >= writeColumnCount) {
                // Should come back a null.
                verifyRead(binarySortableDeserializeRead, typeInfos[index], null);
            } else {
                verifyRead(binarySortableDeserializeRead, typeInfos[index], row[index]);
            }
        }
        if (writeColumnCount == columnCount) {
            TestCase.assertTrue(binarySortableDeserializeRead.isEndOfInputReached());
        }
    }
}
Also used : BinarySortableDeserializeRead(org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead) ArrayList(java.util.ArrayList) BytesWritable(org.apache.hadoop.io.BytesWritable) BinarySortableSerializeWrite(org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite) Output(org.apache.hadoop.hive.serde2.ByteStream.Output) EOFException(java.io.EOFException) UnionObject(org.apache.hadoop.hive.serde2.objectinspector.UnionObject)

Aggregations

BytesWritable (org.apache.hadoop.io.BytesWritable)275 Text (org.apache.hadoop.io.Text)73 LongWritable (org.apache.hadoop.io.LongWritable)59 Test (org.junit.Test)53 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)46 IntWritable (org.apache.hadoop.io.IntWritable)44 ArrayList (java.util.ArrayList)39 Path (org.apache.hadoop.fs.Path)38 IOException (java.io.IOException)36 Configuration (org.apache.hadoop.conf.Configuration)33 FloatWritable (org.apache.hadoop.io.FloatWritable)33 Writable (org.apache.hadoop.io.Writable)32 BooleanWritable (org.apache.hadoop.io.BooleanWritable)31 List (java.util.List)30 SequenceFile (org.apache.hadoop.io.SequenceFile)27 Random (java.util.Random)24 DoubleWritable (org.apache.hadoop.hive.serde2.io.DoubleWritable)24 ShortWritable (org.apache.hadoop.hive.serde2.io.ShortWritable)23 ByteWritable (org.apache.hadoop.hive.serde2.io.ByteWritable)22 FileSystem (org.apache.hadoop.fs.FileSystem)21