Search in sources :

Example 6 with DynamicSerDe

use of org.apache.hadoop.hive.serde2.dynamic_type.DynamicSerDe in project hive by apache.

the class TestDynamicSerDe method testTBinarySortableProtocol.

private void testTBinarySortableProtocol(Object[] structs, String ddl, boolean ascending) throws Throwable {
    int fields = ((List) structs[structs.length - 1]).size();
    String order = "";
    for (int i = 0; i < fields; i++) {
        order = order + (ascending ? "+" : "-");
    }
    Properties schema = new Properties();
    schema.setProperty(serdeConstants.SERIALIZATION_FORMAT, org.apache.hadoop.hive.serde2.thrift.TBinarySortableProtocol.class.getName());
    schema.setProperty(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_NAME, "test");
    schema.setProperty(serdeConstants.SERIALIZATION_DDL, ddl);
    schema.setProperty(serdeConstants.SERIALIZATION_LIB, DynamicSerDe.class.getName());
    schema.setProperty(serdeConstants.SERIALIZATION_SORT_ORDER, order);
    DynamicSerDe serde = new DynamicSerDe();
    serde.initialize(new Configuration(), schema);
    ObjectInspector oi = serde.getObjectInspector();
    // Try to serialize
    BytesWritable[] bytes = new BytesWritable[structs.length];
    for (int i = 0; i < structs.length; i++) {
        bytes[i] = new BytesWritable();
        BytesWritable s = (BytesWritable) serde.serialize(structs[i], oi);
        bytes[i].set(s);
        if (i > 0) {
            int compareResult = bytes[i - 1].compareTo(bytes[i]);
            if ((compareResult < 0 && !ascending) || (compareResult > 0 && ascending)) {
                System.out.println("Test failed in " + (ascending ? "ascending" : "descending") + " order.");
                System.out.println("serialized data of " + structs[i - 1] + " = " + hexString(bytes[i - 1]));
                System.out.println("serialized data of " + structs[i] + " = " + hexString(bytes[i]));
                fail("Sort order of serialized " + structs[i - 1] + " and " + structs[i] + " are reversed!");
            }
        }
    }
    // Try to deserialize
    Object[] deserialized = new Object[structs.length];
    for (int i = 0; i < structs.length; i++) {
        deserialized[i] = serde.deserialize(bytes[i]);
        if (!structs[i].equals(deserialized[i])) {
            System.out.println("structs[i] = " + structs[i]);
            System.out.println("deserialized[i] = " + deserialized[i]);
            System.out.println("serialized[i] = " + hexString(bytes[i]));
            assertEquals(structs[i], deserialized[i]);
        }
    }
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) Configuration(org.apache.hadoop.conf.Configuration) BytesWritable(org.apache.hadoop.io.BytesWritable) Properties(java.util.Properties) ArrayList(java.util.ArrayList) List(java.util.List)

Example 7 with DynamicSerDe

use of org.apache.hadoop.hive.serde2.dynamic_type.DynamicSerDe in project hive by apache.

the class TestDynamicSerDe method testNulls4.

/**
 * Tests map and list null/empty with return nulls *off*.
 */
public void testNulls4() throws Throwable {
    try {
        // Try to construct an object
        ArrayList<String> bye = new ArrayList<String>();
        HashMap<String, Integer> another = null;
        ArrayList<Object> struct = new ArrayList<Object>();
        struct.add(null);
        struct.add(bye);
        struct.add(another);
        Properties schema = new Properties();
        schema.setProperty(serdeConstants.SERIALIZATION_FORMAT, org.apache.hadoop.hive.serde2.thrift.TCTLSeparatedProtocol.class.getName());
        schema.setProperty(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_NAME, "test");
        schema.setProperty(serdeConstants.SERIALIZATION_DDL, "struct test { i32 hello, list<string> bye, map<string,i32> another}");
        schema.setProperty(serdeConstants.SERIALIZATION_LIB, new DynamicSerDe().getClass().toString());
        schema.setProperty(TCTLSeparatedProtocol.ReturnNullsKey, "false");
        DynamicSerDe serde = new DynamicSerDe();
        serde.initialize(new Configuration(), schema);
        ObjectInspector oi = serde.getObjectInspector();
        // Try to serialize
        BytesWritable bytes = (BytesWritable) serde.serialize(struct, oi);
        hexString(bytes);
        // Try to deserialize
        Object o = serde.deserialize(bytes);
        List<?> olist = (List<?>) o;
        assertTrue(olist.size() == 3);
        assertEquals(new Integer(0), (Integer) olist.get(0));
        List<?> num1 = (List<?>) olist.get(1);
        assertTrue(num1.size() == 0);
        Map<?, ?> num2 = (Map<?, ?>) olist.get(2);
        assertTrue(num2.size() == 0);
    // assertEquals(o, struct); Cannot do this because types of null lists are
    // wrong.
    } catch (Throwable e) {
        e.printStackTrace();
        throw e;
    }
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) Configuration(org.apache.hadoop.conf.Configuration) ArrayList(java.util.ArrayList) BytesWritable(org.apache.hadoop.io.BytesWritable) Properties(java.util.Properties) ArrayList(java.util.ArrayList) List(java.util.List) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map) TCTLSeparatedProtocol(org.apache.hadoop.hive.serde2.thrift.TCTLSeparatedProtocol)

Example 8 with DynamicSerDe

use of org.apache.hadoop.hive.serde2.dynamic_type.DynamicSerDe in project hive by apache.

the class TestDynamicSerDe method testNulls3.

/**
 * Tests map and list being empty with return nulls on.
 */
public void testNulls3() throws Throwable {
    try {
        // Try to construct an object
        ArrayList<String> bye = new ArrayList<String>();
        HashMap<String, Integer> another = null;
        ArrayList<Object> struct = new ArrayList<Object>();
        struct.add(null);
        struct.add(bye);
        struct.add(another);
        Properties schema = new Properties();
        schema.setProperty(serdeConstants.SERIALIZATION_FORMAT, org.apache.hadoop.hive.serde2.thrift.TCTLSeparatedProtocol.class.getName());
        schema.setProperty(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_NAME, "test");
        schema.setProperty(serdeConstants.SERIALIZATION_DDL, "struct test { i32 hello, list<string> bye, map<string,i32> another}");
        schema.setProperty(serdeConstants.SERIALIZATION_LIB, new DynamicSerDe().getClass().toString());
        schema.setProperty(TCTLSeparatedProtocol.ReturnNullsKey, "true");
        DynamicSerDe serde = new DynamicSerDe();
        serde.initialize(new Configuration(), schema);
        ObjectInspector oi = serde.getObjectInspector();
        // Try to serialize
        BytesWritable bytes = (BytesWritable) serde.serialize(struct, oi);
        hexString(bytes);
        // Try to deserialize
        Object o = serde.deserialize(bytes);
        List<?> olist = (List<?>) o;
        assertTrue(olist.size() == 3);
        assertEquals(null, olist.get(0));
        assertEquals(0, ((List<?>) olist.get(1)).size());
        assertEquals(null, olist.get(2));
    // assertEquals(o, struct); Cannot do this because types of null lists are
    // wrong.
    } catch (Throwable e) {
        e.printStackTrace();
        throw e;
    }
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) Configuration(org.apache.hadoop.conf.Configuration) ArrayList(java.util.ArrayList) BytesWritable(org.apache.hadoop.io.BytesWritable) Properties(java.util.Properties) ArrayList(java.util.ArrayList) List(java.util.List) TCTLSeparatedProtocol(org.apache.hadoop.hive.serde2.thrift.TCTLSeparatedProtocol)

Example 9 with DynamicSerDe

use of org.apache.hadoop.hive.serde2.dynamic_type.DynamicSerDe in project hive by apache.

the class TestDynamicSerDe method testSkip.

public void testSkip() throws Throwable {
    try {
        // Try to construct an object
        ArrayList<String> bye = new ArrayList<String>();
        bye.add("firstString");
        bye.add("secondString");
        LinkedHashMap<String, Integer> another = new LinkedHashMap<String, Integer>();
        another.put("firstKey", 1);
        another.put("secondKey", 2);
        ArrayList<Object> struct = new ArrayList<Object>();
        struct.add(Integer.valueOf(234));
        struct.add(bye);
        struct.add(another);
        Properties schema = new Properties();
        schema.setProperty(serdeConstants.SERIALIZATION_FORMAT, org.apache.hadoop.hive.serde2.thrift.TCTLSeparatedProtocol.class.getName());
        schema.setProperty(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_NAME, "test");
        schema.setProperty(serdeConstants.SERIALIZATION_DDL, "struct test { i32 hello, list<string> bye, map<string,i32> another}");
        schema.setProperty(serdeConstants.SERIALIZATION_LIB, new DynamicSerDe().getClass().toString());
        schema.setProperty(serdeConstants.FIELD_DELIM, "9");
        schema.setProperty(serdeConstants.COLLECTION_DELIM, "1");
        schema.setProperty(serdeConstants.LINE_DELIM, "2");
        schema.setProperty(serdeConstants.MAPKEY_DELIM, "4");
        DynamicSerDe serde = new DynamicSerDe();
        serde.initialize(new Configuration(), schema);
        TCTLSeparatedProtocol prot = (TCTLSeparatedProtocol) serde.oprot_;
        assertTrue(prot.getPrimarySeparator().equals("\u0009"));
        ObjectInspector oi = serde.getObjectInspector();
        // Try to serialize
        BytesWritable bytes = (BytesWritable) serde.serialize(struct, oi);
        hexString(bytes);
        String compare = "234" + "\u0009" + "firstString" + "\u0001" + "secondString" + "\u0009" + "firstKey" + "\u0004" + "1" + "\u0001" + "secondKey" + "\u0004" + "2";
        System.out.println("bytes in text =" + new String(bytes.get(), 0, bytes.getSize()) + ">");
        System.out.println("compare to    =" + compare + ">");
        assertTrue(compare.equals(new String(bytes.get(), 0, bytes.getSize())));
        schema.setProperty(serdeConstants.SERIALIZATION_DDL, "struct test { i32 hello, skip list<string> bye, map<string,i32> another}");
        serde.initialize(new Configuration(), schema);
        // Try to deserialize
        Object o = serde.deserialize(bytes);
        System.out.println("o class = " + o.getClass());
        List<?> olist = (List<?>) o;
        System.out.println("o size = " + olist.size());
        System.out.println("o = " + o);
        assertEquals(null, olist.get(1));
        // set the skipped field to null
        struct.set(1, null);
        assertEquals(o, struct);
    } catch (Throwable e) {
        e.printStackTrace();
        throw e;
    }
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) Configuration(org.apache.hadoop.conf.Configuration) ArrayList(java.util.ArrayList) BytesWritable(org.apache.hadoop.io.BytesWritable) Properties(java.util.Properties) LinkedHashMap(java.util.LinkedHashMap) ArrayList(java.util.ArrayList) List(java.util.List) TCTLSeparatedProtocol(org.apache.hadoop.hive.serde2.thrift.TCTLSeparatedProtocol)

Aggregations

ArrayList (java.util.ArrayList)9 Properties (java.util.Properties)9 Configuration (org.apache.hadoop.conf.Configuration)9 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)9 BytesWritable (org.apache.hadoop.io.BytesWritable)9 List (java.util.List)8 TCTLSeparatedProtocol (org.apache.hadoop.hive.serde2.thrift.TCTLSeparatedProtocol)7 LinkedHashMap (java.util.LinkedHashMap)6 HashMap (java.util.HashMap)4 Map (java.util.Map)1