Search in sources :

Example 1 with TCTLSeparatedProtocol

use of org.apache.hadoop.hive.serde2.thrift.TCTLSeparatedProtocol in project hive by apache.

the class TestDynamicSerDe method testDynamicSerDe.

public void testDynamicSerDe() throws Throwable {
    try {
        // Try to construct an object
        ArrayList<String> bye = new ArrayList<String>();
        bye.add("firstString");
        bye.add("secondString");
        HashMap<String, Integer> another = new HashMap<String, Integer>();
        another.put("firstKey", 1);
        another.put("secondKey", 2);
        ArrayList<Object> struct = new ArrayList<Object>();
        struct.add(Integer.valueOf(234));
        struct.add(bye);
        struct.add(another);
        struct.add(Integer.valueOf(-234));
        struct.add(Double.valueOf(1.0));
        struct.add(Double.valueOf(-2.5));
        // All protocols
        ArrayList<String> protocols = new ArrayList<String>();
        ArrayList<Boolean> isBinaries = new ArrayList<Boolean>();
        ArrayList<HashMap<String, String>> additionalParams = new ArrayList<HashMap<String, String>>();
        protocols.add(org.apache.hadoop.hive.serde2.thrift.TBinarySortableProtocol.class.getName());
        isBinaries.add(true);
        additionalParams.add(makeHashMap("serialization.sort.order", "++++++"));
        protocols.add(org.apache.hadoop.hive.serde2.thrift.TBinarySortableProtocol.class.getName());
        isBinaries.add(true);
        additionalParams.add(makeHashMap("serialization.sort.order", "------"));
        protocols.add(org.apache.thrift.protocol.TBinaryProtocol.class.getName());
        isBinaries.add(true);
        additionalParams.add(null);
        protocols.add(org.apache.thrift.protocol.TJSONProtocol.class.getName());
        isBinaries.add(false);
        additionalParams.add(null);
        // TSimpleJSONProtocol does not support deserialization.
        // protocols.add(org.apache.thrift.protocol.TSimpleJSONProtocol.class.getName());
        // isBinaries.add(false);
        // additionalParams.add(null);
        // TCTLSeparatedProtocol is not done yet.
        protocols.add(org.apache.hadoop.hive.serde2.thrift.TCTLSeparatedProtocol.class.getName());
        isBinaries.add(false);
        additionalParams.add(null);
        System.out.println("input struct = " + struct);
        for (int pp = 0; pp < protocols.size(); pp++) {
            String protocol = protocols.get(pp);
            boolean isBinary = isBinaries.get(pp);
            System.out.println("Testing protocol: " + protocol);
            Properties schema = new Properties();
            schema.setProperty(serdeConstants.SERIALIZATION_FORMAT, protocol);
            schema.setProperty(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_NAME, "test");
            schema.setProperty(serdeConstants.SERIALIZATION_DDL, "struct test { i32 _hello, list<string> 2bye, map<string,i32> another, i32 nhello, double d, double nd}");
            schema.setProperty(serdeConstants.SERIALIZATION_LIB, new DynamicSerDe().getClass().toString());
            HashMap<String, String> p = additionalParams.get(pp);
            if (p != null) {
                for (Entry<String, String> e : p.entrySet()) {
                    schema.setProperty(e.getKey(), e.getValue());
                }
            }
            DynamicSerDe serde = new DynamicSerDe();
            serde.initialize(new Configuration(), schema);
            // Try getObjectInspector
            ObjectInspector oi = serde.getObjectInspector();
            System.out.println("TypeName = " + oi.getTypeName());
            // Try to serialize
            BytesWritable bytes = (BytesWritable) serde.serialize(struct, oi);
            System.out.println("bytes =" + hexString(bytes));
            if (!isBinary) {
                System.out.println("bytes in text =" + new String(bytes.get(), 0, bytes.getSize()));
            }
            // Try to deserialize
            Object o = serde.deserialize(bytes);
            System.out.println("o class = " + o.getClass());
            List<?> olist = (List<?>) o;
            System.out.println("o size = " + olist.size());
            System.out.println("o[0] class = " + olist.get(0).getClass());
            System.out.println("o[1] class = " + olist.get(1).getClass());
            System.out.println("o[2] class = " + olist.get(2).getClass());
            System.out.println("o = " + o);
            assertEquals(struct, o);
        }
    } catch (Throwable e) {
        e.printStackTrace();
        throw e;
    }
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) ArrayList(java.util.ArrayList) Properties(java.util.Properties) ArrayList(java.util.ArrayList) List(java.util.List) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) BytesWritable(org.apache.hadoop.io.BytesWritable) TCTLSeparatedProtocol(org.apache.hadoop.hive.serde2.thrift.TCTLSeparatedProtocol)

Example 2 with TCTLSeparatedProtocol

use of org.apache.hadoop.hive.serde2.thrift.TCTLSeparatedProtocol in project hive by apache.

the class TestTCTLSeparatedProtocol method testShouldThrowRunTimeExceptionIfUnableToInitializeTokenizer.

public void testShouldThrowRunTimeExceptionIfUnableToInitializeTokenizer() throws Exception {
    TCTLSeparatedProtocol separatedProtocol = new TCTLSeparatedProtocol(new TTransport() {

        @Override
        public void close() {
        }

        @Override
        public boolean isOpen() {
            return false;
        }

        @Override
        public void open() throws TTransportException {
        }

        @Override
        public int read(byte[] buf, int off, int len) throws TTransportException {
            throw new TTransportException();
        }

        @Override
        public void write(byte[] buf, int off, int len) throws TTransportException {
        }
    });
    separatedProtocol.initialize(null, new Properties());
    try {
        separatedProtocol.readStructBegin();
        fail("Runtime Exception is expected if the intialization of tokenizer failed.");
    } catch (Exception e) {
        assertTrue(e.getCause() instanceof TTransportException);
    }
}
Also used : TTransportException(org.apache.thrift.transport.TTransportException) TTransport(org.apache.thrift.transport.TTransport) Properties(java.util.Properties) TTransportException(org.apache.thrift.transport.TTransportException) TCTLSeparatedProtocol(org.apache.hadoop.hive.serde2.thrift.TCTLSeparatedProtocol)

Example 3 with TCTLSeparatedProtocol

use of org.apache.hadoop.hive.serde2.thrift.TCTLSeparatedProtocol in project hive by apache.

the class TestTCTLSeparatedProtocol method testReads.

public void testReads() throws Exception {
    TMemoryBuffer trans = new TMemoryBuffer(1024);
    String foo = "Hello";
    String bar = "World!";
    String key = "22";
    String value = "TheValue";
    String key2 = "24";
    String value2 = "TheValueAgain";
    byte[] columnSeparator = { 1 };
    byte[] elementSeparator = { 2 };
    byte[] kvSeparator = { 3 };
    trans.write(foo.getBytes(), 0, foo.getBytes().length);
    trans.write(columnSeparator, 0, 1);
    trans.write(columnSeparator, 0, 1);
    trans.write(bar.getBytes(), 0, bar.getBytes().length);
    trans.write(columnSeparator, 0, 1);
    trans.write(key.getBytes(), 0, key.getBytes().length);
    trans.write(kvSeparator, 0, 1);
    trans.write(value.getBytes(), 0, value.getBytes().length);
    trans.write(elementSeparator, 0, 1);
    trans.write(key2.getBytes(), 0, key2.getBytes().length);
    trans.write(kvSeparator, 0, 1);
    trans.write(value2.getBytes(), 0, value2.getBytes().length);
    trans.flush();
    // use 3 as the row buffer size to force lots of re-buffering.
    TCTLSeparatedProtocol prot = new TCTLSeparatedProtocol(trans, 1024);
    prot.initialize(new Configuration(), new Properties());
    prot.readStructBegin();
    prot.readFieldBegin();
    String hello = prot.readString();
    prot.readFieldEnd();
    assertTrue(hello.equals(foo));
    prot.readFieldBegin();
    assertTrue(prot.readString().equals(""));
    prot.readFieldEnd();
    prot.readFieldBegin();
    assertTrue(prot.readString().equals(bar));
    prot.readFieldEnd();
    prot.readFieldBegin();
    TMap mapHeader = prot.readMapBegin();
    assertTrue(mapHeader.size == 2);
    assertTrue(prot.readI32() == 22);
    assertTrue(prot.readString().equals(value));
    assertTrue(prot.readI32() == 24);
    assertTrue(prot.readString().equals(value2));
    prot.readMapEnd();
    prot.readFieldEnd();
    prot.readFieldBegin();
    hello = prot.readString();
    prot.readFieldEnd();
    assertNull(hello);
    prot.readStructEnd();
}
Also used : TMemoryBuffer(org.apache.thrift.transport.TMemoryBuffer) Configuration(org.apache.hadoop.conf.Configuration) Properties(java.util.Properties) TCTLSeparatedProtocol(org.apache.hadoop.hive.serde2.thrift.TCTLSeparatedProtocol) TMap(org.apache.thrift.protocol.TMap)

Example 4 with TCTLSeparatedProtocol

use of org.apache.hadoop.hive.serde2.thrift.TCTLSeparatedProtocol in project hive by apache.

the class TestTCTLSeparatedProtocol method testNulls.

public void testNulls() throws Exception {
    TMemoryBuffer trans = new TMemoryBuffer(1024);
    TCTLSeparatedProtocol prot = new TCTLSeparatedProtocol(trans, 10);
    prot.initialize(new Configuration(), new Properties());
    prot.writeStructBegin(new TStruct());
    prot.writeFieldBegin(new TField());
    prot.writeString(null);
    prot.writeFieldEnd();
    prot.writeFieldBegin(new TField());
    prot.writeString(null);
    prot.writeFieldEnd();
    prot.writeFieldBegin(new TField());
    prot.writeI32(100);
    prot.writeFieldEnd();
    prot.writeFieldBegin(new TField());
    prot.writeString(null);
    prot.writeFieldEnd();
    prot.writeFieldBegin(new TField());
    prot.writeMapBegin(new TMap());
    prot.writeString(null);
    prot.writeString(null);
    prot.writeString("key2");
    prot.writeString(null);
    prot.writeString(null);
    prot.writeString("val3");
    prot.writeMapEnd();
    prot.writeFieldEnd();
    prot.writeStructEnd();
    byte[] b = new byte[3 * 1024];
    int len = trans.read(b, 0, b.length);
    String written = new String(b, 0, len);
    String testRef = "\\N\\N100\\N\\N\\Nkey2\\N\\Nval3";
    assertTrue(testRef.equals(written));
    trans = new TMemoryBuffer(1023);
    trans.write(b, 0, len);
    prot = new TCTLSeparatedProtocol(trans, 3);
    prot.initialize(new Configuration(), new Properties());
    prot.readStructBegin();
    prot.readFieldBegin();
    String ret = prot.readString();
    prot.readFieldEnd();
    assertNull(ret);
    prot.readFieldBegin();
    ret = prot.readString();
    prot.readFieldEnd();
    assertNull(ret);
    prot.readFieldBegin();
    int ret1 = prot.readI32();
    prot.readFieldEnd();
    assertTrue(ret1 == 100);
    prot.readFieldBegin();
    ret1 = prot.readI32();
    prot.readFieldEnd();
    prot.readFieldBegin();
    TMap map = prot.readMapBegin();
    assertTrue(map.size == 3);
    assertNull(prot.readString());
    assertNull(prot.readString());
    assertTrue(prot.readString().equals("key2"));
    assertNull(prot.readString());
    assertNull(prot.readString());
    assertTrue(prot.readString().equals("val3"));
    prot.readMapEnd();
    prot.readFieldEnd();
    assertTrue(ret1 == 0);
}
Also used : TMemoryBuffer(org.apache.thrift.transport.TMemoryBuffer) Configuration(org.apache.hadoop.conf.Configuration) TField(org.apache.thrift.protocol.TField) Properties(java.util.Properties) TStruct(org.apache.thrift.protocol.TStruct) TCTLSeparatedProtocol(org.apache.hadoop.hive.serde2.thrift.TCTLSeparatedProtocol) TMap(org.apache.thrift.protocol.TMap)

Example 5 with TCTLSeparatedProtocol

use of org.apache.hadoop.hive.serde2.thrift.TCTLSeparatedProtocol in project hive by apache.

the class TestDynamicSerDe method testConfigurableTCTLSeparated.

public void testConfigurableTCTLSeparated() throws Throwable {
    try {
        // Try to construct an object
        ArrayList<String> bye = new ArrayList<String>();
        bye.add("firstString");
        bye.add("secondString");
        LinkedHashMap<String, Integer> another = new LinkedHashMap<String, Integer>();
        another.put("firstKey", 1);
        another.put("secondKey", 2);
        ArrayList<Object> struct = new ArrayList<Object>();
        struct.add(Integer.valueOf(234));
        struct.add(bye);
        struct.add(another);
        Properties schema = new Properties();
        schema.setProperty(serdeConstants.SERIALIZATION_FORMAT, org.apache.hadoop.hive.serde2.thrift.TCTLSeparatedProtocol.class.getName());
        schema.setProperty(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_NAME, "test");
        schema.setProperty(serdeConstants.SERIALIZATION_DDL, "struct test { i32 hello, list<string> bye, map<string,i32> another}");
        schema.setProperty(serdeConstants.SERIALIZATION_LIB, new DynamicSerDe().getClass().toString());
        schema.setProperty(serdeConstants.FIELD_DELIM, "9");
        schema.setProperty(serdeConstants.COLLECTION_DELIM, "1");
        schema.setProperty(serdeConstants.LINE_DELIM, "2");
        schema.setProperty(serdeConstants.MAPKEY_DELIM, "4");
        DynamicSerDe serde = new DynamicSerDe();
        serde.initialize(new Configuration(), schema);
        TCTLSeparatedProtocol prot = (TCTLSeparatedProtocol) serde.oprot_;
        assertTrue(prot.getPrimarySeparator().equals("\u0009"));
        ObjectInspector oi = serde.getObjectInspector();
        // Try to serialize
        BytesWritable bytes = (BytesWritable) serde.serialize(struct, oi);
        hexString(bytes);
        String compare = "234" + "\u0009" + "firstString" + "\u0001" + "secondString" + "\u0009" + "firstKey" + "\u0004" + "1" + "\u0001" + "secondKey" + "\u0004" + "2";
        System.out.println("bytes in text =" + new String(bytes.get(), 0, bytes.getSize()) + ">");
        System.out.println("compare to    =" + compare + ">");
        assertTrue(compare.equals(new String(bytes.get(), 0, bytes.getSize())));
        // Try to deserialize
        Object o = serde.deserialize(bytes);
        System.out.println("o class = " + o.getClass());
        List<?> olist = (List<?>) o;
        System.out.println("o size = " + olist.size());
        System.out.println("o[0] class = " + olist.get(0).getClass());
        System.out.println("o[1] class = " + olist.get(1).getClass());
        System.out.println("o[2] class = " + olist.get(2).getClass());
        System.out.println("o = " + o);
        assertEquals(o, struct);
    } catch (Throwable e) {
        e.printStackTrace();
        throw e;
    }
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) Configuration(org.apache.hadoop.conf.Configuration) ArrayList(java.util.ArrayList) BytesWritable(org.apache.hadoop.io.BytesWritable) Properties(java.util.Properties) LinkedHashMap(java.util.LinkedHashMap) ArrayList(java.util.ArrayList) List(java.util.List) TCTLSeparatedProtocol(org.apache.hadoop.hive.serde2.thrift.TCTLSeparatedProtocol)

Aggregations

Properties (java.util.Properties)9 TCTLSeparatedProtocol (org.apache.hadoop.hive.serde2.thrift.TCTLSeparatedProtocol)9 Configuration (org.apache.hadoop.conf.Configuration)8 TMemoryBuffer (org.apache.thrift.transport.TMemoryBuffer)5 ArrayList (java.util.ArrayList)3 LinkedHashMap (java.util.LinkedHashMap)3 List (java.util.List)3 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)3 BytesWritable (org.apache.hadoop.io.BytesWritable)3 TField (org.apache.thrift.protocol.TField)3 TMap (org.apache.thrift.protocol.TMap)3 TStruct (org.apache.thrift.protocol.TStruct)3 TList (org.apache.thrift.protocol.TList)2 HashMap (java.util.HashMap)1 TTransport (org.apache.thrift.transport.TTransport)1 TTransportException (org.apache.thrift.transport.TTransportException)1