Search in sources :

Example 1 with DynamicSerDe

use of org.apache.hadoop.hive.serde2.dynamic_type.DynamicSerDe in project hive by apache.

the class TestDynamicSerDe method testConfigurableTCTLSeparated.

public void testConfigurableTCTLSeparated() throws Throwable {
    try {
        // Try to construct an object
        ArrayList<String> bye = new ArrayList<String>();
        bye.add("firstString");
        bye.add("secondString");
        LinkedHashMap<String, Integer> another = new LinkedHashMap<String, Integer>();
        another.put("firstKey", 1);
        another.put("secondKey", 2);
        ArrayList<Object> struct = new ArrayList<Object>();
        struct.add(Integer.valueOf(234));
        struct.add(bye);
        struct.add(another);
        Properties schema = new Properties();
        schema.setProperty(serdeConstants.SERIALIZATION_FORMAT, org.apache.hadoop.hive.serde2.thrift.TCTLSeparatedProtocol.class.getName());
        schema.setProperty(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_NAME, "test");
        schema.setProperty(serdeConstants.SERIALIZATION_DDL, "struct test { i32 hello, list<string> bye, map<string,i32> another}");
        schema.setProperty(serdeConstants.SERIALIZATION_LIB, new DynamicSerDe().getClass().toString());
        schema.setProperty(serdeConstants.FIELD_DELIM, "9");
        schema.setProperty(serdeConstants.COLLECTION_DELIM, "1");
        schema.setProperty(serdeConstants.LINE_DELIM, "2");
        schema.setProperty(serdeConstants.MAPKEY_DELIM, "4");
        DynamicSerDe serde = new DynamicSerDe();
        serde.initialize(new Configuration(), schema);
        TCTLSeparatedProtocol prot = (TCTLSeparatedProtocol) serde.oprot_;
        assertTrue(prot.getPrimarySeparator().equals("	"));
        ObjectInspector oi = serde.getObjectInspector();
        // Try to serialize
        BytesWritable bytes = (BytesWritable) serde.serialize(struct, oi);
        hexString(bytes);
        String compare = "234" + "	" + "firstString" + "" + "secondString" + "	" + "firstKey" + "" + "1" + "" + "secondKey" + "" + "2";
        System.out.println("bytes in text =" + new String(bytes.get(), 0, bytes.getSize()) + ">");
        System.out.println("compare to    =" + compare + ">");
        assertTrue(compare.equals(new String(bytes.get(), 0, bytes.getSize())));
        // Try to deserialize
        Object o = serde.deserialize(bytes);
        System.out.println("o class = " + o.getClass());
        List<?> olist = (List<?>) o;
        System.out.println("o size = " + olist.size());
        System.out.println("o[0] class = " + olist.get(0).getClass());
        System.out.println("o[1] class = " + olist.get(1).getClass());
        System.out.println("o[2] class = " + olist.get(2).getClass());
        System.out.println("o = " + o);
        assertEquals(o, struct);
    } catch (Throwable e) {
        e.printStackTrace();
        throw e;
    }
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) Configuration(org.apache.hadoop.conf.Configuration) ArrayList(java.util.ArrayList) BytesWritable(org.apache.hadoop.io.BytesWritable) Properties(java.util.Properties) LinkedHashMap(java.util.LinkedHashMap) ArrayList(java.util.ArrayList) List(java.util.List) TCTLSeparatedProtocol(org.apache.hadoop.hive.serde2.thrift.TCTLSeparatedProtocol)

Example 2 with DynamicSerDe

use of org.apache.hadoop.hive.serde2.dynamic_type.DynamicSerDe in project hive by apache.

the class TestDynamicSerDe method testNulls1.

/**
   * Tests a single null list within a struct with return nulls on.
   */
public void testNulls1() throws Throwable {
    try {
        // Try to construct an object
        ArrayList<String> bye = null;
        HashMap<String, Integer> another = new HashMap<String, Integer>();
        another.put("firstKey", 1);
        another.put("secondKey", 2);
        ArrayList<Object> struct = new ArrayList<Object>();
        struct.add(Integer.valueOf(234));
        struct.add(bye);
        struct.add(another);
        Properties schema = new Properties();
        schema.setProperty(serdeConstants.SERIALIZATION_FORMAT, org.apache.hadoop.hive.serde2.thrift.TCTLSeparatedProtocol.class.getName());
        schema.setProperty(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_NAME, "test");
        schema.setProperty(serdeConstants.SERIALIZATION_DDL, "struct test { i32 hello, list<string> bye, map<string,i32> another}");
        schema.setProperty(serdeConstants.SERIALIZATION_LIB, new DynamicSerDe().getClass().toString());
        schema.setProperty(TCTLSeparatedProtocol.ReturnNullsKey, "true");
        DynamicSerDe serde = new DynamicSerDe();
        serde.initialize(new Configuration(), schema);
        ObjectInspector oi = serde.getObjectInspector();
        // Try to serialize
        BytesWritable bytes = (BytesWritable) serde.serialize(struct, oi);
        hexString(bytes);
        // Try to deserialize
        Object o = serde.deserialize(bytes);
        assertEquals(struct, o);
    } catch (Throwable e) {
        e.printStackTrace();
        throw e;
    }
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) Configuration(org.apache.hadoop.conf.Configuration) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) ArrayList(java.util.ArrayList) BytesWritable(org.apache.hadoop.io.BytesWritable) Properties(java.util.Properties) TCTLSeparatedProtocol(org.apache.hadoop.hive.serde2.thrift.TCTLSeparatedProtocol)

Example 3 with DynamicSerDe

use of org.apache.hadoop.hive.serde2.dynamic_type.DynamicSerDe in project hive by apache.

the class TestDynamicSerDe method testNulls2.

/**
   * Tests all elements of a struct being null with return nulls on.
   */
public void testNulls2() throws Throwable {
    try {
        // Try to construct an object
        ArrayList<String> bye = null;
        HashMap<String, Integer> another = null;
        ArrayList<Object> struct = new ArrayList<Object>();
        struct.add(null);
        struct.add(bye);
        struct.add(another);
        Properties schema = new Properties();
        schema.setProperty(serdeConstants.SERIALIZATION_FORMAT, org.apache.hadoop.hive.serde2.thrift.TCTLSeparatedProtocol.class.getName());
        schema.setProperty(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_NAME, "test");
        schema.setProperty(serdeConstants.SERIALIZATION_DDL, "struct test { i32 hello, list<string> bye, map<string,i32> another}");
        schema.setProperty(serdeConstants.SERIALIZATION_LIB, new DynamicSerDe().getClass().toString());
        schema.setProperty(TCTLSeparatedProtocol.ReturnNullsKey, "true");
        DynamicSerDe serde = new DynamicSerDe();
        serde.initialize(new Configuration(), schema);
        ObjectInspector oi = serde.getObjectInspector();
        // Try to serialize
        BytesWritable bytes = (BytesWritable) serde.serialize(struct, oi);
        hexString(bytes);
        // Try to deserialize
        Object o = serde.deserialize(bytes);
        List<?> olist = (List<?>) o;
        assertTrue(olist.size() == 3);
        assertEquals(null, olist.get(0));
        assertEquals(null, olist.get(1));
        assertEquals(null, olist.get(2));
    // assertEquals(o, struct); Cannot do this because types of null lists are
    // wrong.
    } catch (Throwable e) {
        e.printStackTrace();
        throw e;
    }
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) Configuration(org.apache.hadoop.conf.Configuration) ArrayList(java.util.ArrayList) BytesWritable(org.apache.hadoop.io.BytesWritable) Properties(java.util.Properties) ArrayList(java.util.ArrayList) List(java.util.List) TCTLSeparatedProtocol(org.apache.hadoop.hive.serde2.thrift.TCTLSeparatedProtocol)

Example 4 with DynamicSerDe

use of org.apache.hadoop.hive.serde2.dynamic_type.DynamicSerDe in project hive by apache.

the class TestDynamicSerDe method testDynamicSerDe.

public void testDynamicSerDe() throws Throwable {
    try {
        // Try to construct an object
        ArrayList<String> bye = new ArrayList<String>();
        bye.add("firstString");
        bye.add("secondString");
        HashMap<String, Integer> another = new HashMap<String, Integer>();
        another.put("firstKey", 1);
        another.put("secondKey", 2);
        ArrayList<Object> struct = new ArrayList<Object>();
        struct.add(Integer.valueOf(234));
        struct.add(bye);
        struct.add(another);
        struct.add(Integer.valueOf(-234));
        struct.add(Double.valueOf(1.0));
        struct.add(Double.valueOf(-2.5));
        // All protocols
        ArrayList<String> protocols = new ArrayList<String>();
        ArrayList<Boolean> isBinaries = new ArrayList<Boolean>();
        ArrayList<HashMap<String, String>> additionalParams = new ArrayList<HashMap<String, String>>();
        protocols.add(org.apache.hadoop.hive.serde2.thrift.TBinarySortableProtocol.class.getName());
        isBinaries.add(true);
        additionalParams.add(makeHashMap("serialization.sort.order", "++++++"));
        protocols.add(org.apache.hadoop.hive.serde2.thrift.TBinarySortableProtocol.class.getName());
        isBinaries.add(true);
        additionalParams.add(makeHashMap("serialization.sort.order", "------"));
        protocols.add(org.apache.thrift.protocol.TBinaryProtocol.class.getName());
        isBinaries.add(true);
        additionalParams.add(null);
        protocols.add(org.apache.thrift.protocol.TJSONProtocol.class.getName());
        isBinaries.add(false);
        additionalParams.add(null);
        // TSimpleJSONProtocol does not support deserialization.
        // protocols.add(org.apache.thrift.protocol.TSimpleJSONProtocol.class.getName());
        // isBinaries.add(false);
        // additionalParams.add(null);
        // TCTLSeparatedProtocol is not done yet.
        protocols.add(org.apache.hadoop.hive.serde2.thrift.TCTLSeparatedProtocol.class.getName());
        isBinaries.add(false);
        additionalParams.add(null);
        System.out.println("input struct = " + struct);
        for (int pp = 0; pp < protocols.size(); pp++) {
            String protocol = protocols.get(pp);
            boolean isBinary = isBinaries.get(pp);
            System.out.println("Testing protocol: " + protocol);
            Properties schema = new Properties();
            schema.setProperty(serdeConstants.SERIALIZATION_FORMAT, protocol);
            schema.setProperty(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_NAME, "test");
            schema.setProperty(serdeConstants.SERIALIZATION_DDL, "struct test { i32 _hello, list<string> 2bye, map<string,i32> another, i32 nhello, double d, double nd}");
            schema.setProperty(serdeConstants.SERIALIZATION_LIB, new DynamicSerDe().getClass().toString());
            HashMap<String, String> p = additionalParams.get(pp);
            if (p != null) {
                for (Entry<String, String> e : p.entrySet()) {
                    schema.setProperty(e.getKey(), e.getValue());
                }
            }
            DynamicSerDe serde = new DynamicSerDe();
            serde.initialize(new Configuration(), schema);
            // Try getObjectInspector
            ObjectInspector oi = serde.getObjectInspector();
            System.out.println("TypeName = " + oi.getTypeName());
            // Try to serialize
            BytesWritable bytes = (BytesWritable) serde.serialize(struct, oi);
            System.out.println("bytes =" + hexString(bytes));
            if (!isBinary) {
                System.out.println("bytes in text =" + new String(bytes.get(), 0, bytes.getSize()));
            }
            // Try to deserialize
            Object o = serde.deserialize(bytes);
            System.out.println("o class = " + o.getClass());
            List<?> olist = (List<?>) o;
            System.out.println("o size = " + olist.size());
            System.out.println("o[0] class = " + olist.get(0).getClass());
            System.out.println("o[1] class = " + olist.get(1).getClass());
            System.out.println("o[2] class = " + olist.get(2).getClass());
            System.out.println("o = " + o);
            assertEquals(struct, o);
        }
    } catch (Throwable e) {
        e.printStackTrace();
        throw e;
    }
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) ArrayList(java.util.ArrayList) Properties(java.util.Properties) ArrayList(java.util.ArrayList) List(java.util.List) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) BytesWritable(org.apache.hadoop.io.BytesWritable) TCTLSeparatedProtocol(org.apache.hadoop.hive.serde2.thrift.TCTLSeparatedProtocol)

Example 5 with DynamicSerDe

use of org.apache.hadoop.hive.serde2.dynamic_type.DynamicSerDe in project hive by apache.

the class TestDynamicSerDe method testStructsinStructs.

/**
   * Tests map and list null/empty with return nulls *off*.
   */
public void testStructsinStructs() throws Throwable {
    try {
        Properties schema = new Properties();
        // schema.setProperty(serdeConstants.SERIALIZATION_FORMAT,
        // org.apache.thrift.protocol.TJSONProtocol.class.getName());
        schema.setProperty(serdeConstants.SERIALIZATION_FORMAT, org.apache.thrift.protocol.TBinaryProtocol.class.getName());
        schema.setProperty(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_NAME, "test");
        schema.setProperty(serdeConstants.SERIALIZATION_DDL, "struct inner { i32 field1, string field2 },struct  test {inner foo,  i32 hello, list<string> bye, map<string,i32> another}");
        schema.setProperty(serdeConstants.SERIALIZATION_LIB, new DynamicSerDe().getClass().toString());
        //
        // construct object of above type
        //
        // construct the inner struct
        ArrayList<Object> innerStruct = new ArrayList<Object>();
        innerStruct.add(new Integer(22));
        innerStruct.add(new String("hello world"));
        // construct outer struct
        ArrayList<String> bye = new ArrayList<String>();
        bye.add("firstString");
        bye.add("secondString");
        HashMap<String, Integer> another = new HashMap<String, Integer>();
        another.put("firstKey", 1);
        another.put("secondKey", 2);
        ArrayList<Object> struct = new ArrayList<Object>();
        struct.add(innerStruct);
        struct.add(Integer.valueOf(234));
        struct.add(bye);
        struct.add(another);
        DynamicSerDe serde = new DynamicSerDe();
        serde.initialize(new Configuration(), schema);
        ObjectInspector oi = serde.getObjectInspector();
        // Try to serialize
        BytesWritable bytes = (BytesWritable) serde.serialize(struct, oi);
        // Try to deserialize
        Object o = serde.deserialize(bytes);
        List<?> olist = (List<?>) o;
        assertEquals(4, olist.size());
        assertEquals(innerStruct, olist.get(0));
        assertEquals(new Integer(234), olist.get(1));
        assertEquals(bye, olist.get(2));
        assertEquals(another, olist.get(3));
    } catch (Throwable e) {
        e.printStackTrace();
        throw e;
    }
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) Configuration(org.apache.hadoop.conf.Configuration) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) ArrayList(java.util.ArrayList) BytesWritable(org.apache.hadoop.io.BytesWritable) Properties(java.util.Properties) ArrayList(java.util.ArrayList) List(java.util.List)

Aggregations

ArrayList (java.util.ArrayList)9 Properties (java.util.Properties)9 Configuration (org.apache.hadoop.conf.Configuration)9 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)9 BytesWritable (org.apache.hadoop.io.BytesWritable)9 List (java.util.List)8 TCTLSeparatedProtocol (org.apache.hadoop.hive.serde2.thrift.TCTLSeparatedProtocol)7 LinkedHashMap (java.util.LinkedHashMap)6 HashMap (java.util.HashMap)4 Map (java.util.Map)2 Path (org.apache.hadoop.fs.Path)1 Order (org.apache.hadoop.hive.metastore.api.Order)1 HiveStorageHandler (org.apache.hadoop.hive.ql.metadata.HiveStorageHandler)1 Table (org.apache.hadoop.hive.ql.metadata.Table)1 LazySimpleSerDe (org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe)1