Search in sources :

Example 6 with TCTLSeparatedProtocol

use of org.apache.hadoop.hive.serde2.thrift.TCTLSeparatedProtocol in project hive by apache.

the class TestDynamicSerDe method testDynamicSerDe.

public void testDynamicSerDe() throws Throwable {
    try {
        // Try to construct an object
        ArrayList<String> bye = new ArrayList<String>();
        bye.add("firstString");
        bye.add("secondString");
        HashMap<String, Integer> another = new HashMap<String, Integer>();
        another.put("firstKey", 1);
        another.put("secondKey", 2);
        ArrayList<Object> struct = new ArrayList<Object>();
        struct.add(Integer.valueOf(234));
        struct.add(bye);
        struct.add(another);
        struct.add(Integer.valueOf(-234));
        struct.add(Double.valueOf(1.0));
        struct.add(Double.valueOf(-2.5));
        // All protocols
        ArrayList<String> protocols = new ArrayList<String>();
        ArrayList<Boolean> isBinaries = new ArrayList<Boolean>();
        ArrayList<HashMap<String, String>> additionalParams = new ArrayList<HashMap<String, String>>();
        protocols.add(org.apache.hadoop.hive.serde2.thrift.TBinarySortableProtocol.class.getName());
        isBinaries.add(true);
        additionalParams.add(makeHashMap("serialization.sort.order", "++++++"));
        protocols.add(org.apache.hadoop.hive.serde2.thrift.TBinarySortableProtocol.class.getName());
        isBinaries.add(true);
        additionalParams.add(makeHashMap("serialization.sort.order", "------"));
        protocols.add(org.apache.thrift.protocol.TBinaryProtocol.class.getName());
        isBinaries.add(true);
        additionalParams.add(null);
        protocols.add(org.apache.thrift.protocol.TJSONProtocol.class.getName());
        isBinaries.add(false);
        additionalParams.add(null);
        // TSimpleJSONProtocol does not support deserialization.
        // protocols.add(org.apache.thrift.protocol.TSimpleJSONProtocol.class.getName());
        // isBinaries.add(false);
        // additionalParams.add(null);
        // TCTLSeparatedProtocol is not done yet.
        protocols.add(org.apache.hadoop.hive.serde2.thrift.TCTLSeparatedProtocol.class.getName());
        isBinaries.add(false);
        additionalParams.add(null);
        System.out.println("input struct = " + struct);
        for (int pp = 0; pp < protocols.size(); pp++) {
            String protocol = protocols.get(pp);
            boolean isBinary = isBinaries.get(pp);
            System.out.println("Testing protocol: " + protocol);
            Properties schema = new Properties();
            schema.setProperty(serdeConstants.SERIALIZATION_FORMAT, protocol);
            schema.setProperty(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_NAME, "test");
            schema.setProperty(serdeConstants.SERIALIZATION_DDL, "struct test { i32 _hello, list<string> 2bye, map<string,i32> another, i32 nhello, double d, double nd}");
            schema.setProperty(serdeConstants.SERIALIZATION_LIB, new DynamicSerDe().getClass().toString());
            HashMap<String, String> p = additionalParams.get(pp);
            if (p != null) {
                for (Entry<String, String> e : p.entrySet()) {
                    schema.setProperty(e.getKey(), e.getValue());
                }
            }
            DynamicSerDe serde = new DynamicSerDe();
            serde.initialize(new Configuration(), schema);
            // Try getObjectInspector
            ObjectInspector oi = serde.getObjectInspector();
            System.out.println("TypeName = " + oi.getTypeName());
            // Try to serialize
            BytesWritable bytes = (BytesWritable) serde.serialize(struct, oi);
            System.out.println("bytes =" + hexString(bytes));
            if (!isBinary) {
                System.out.println("bytes in text =" + new String(bytes.get(), 0, bytes.getSize()));
            }
            // Try to deserialize
            Object o = serde.deserialize(bytes);
            System.out.println("o class = " + o.getClass());
            List<?> olist = (List<?>) o;
            System.out.println("o size = " + olist.size());
            System.out.println("o[0] class = " + olist.get(0).getClass());
            System.out.println("o[1] class = " + olist.get(1).getClass());
            System.out.println("o[2] class = " + olist.get(2).getClass());
            System.out.println("o = " + o);
            assertEquals(struct, o);
        }
    } catch (Throwable e) {
        e.printStackTrace();
        throw e;
    }
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) ArrayList(java.util.ArrayList) Properties(java.util.Properties) ArrayList(java.util.ArrayList) List(java.util.List) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) BytesWritable(org.apache.hadoop.io.BytesWritable) TCTLSeparatedProtocol(org.apache.hadoop.hive.serde2.thrift.TCTLSeparatedProtocol)

Example 7 with TCTLSeparatedProtocol

use of org.apache.hadoop.hive.serde2.thrift.TCTLSeparatedProtocol in project hive by apache.

the class TestTCTLSeparatedProtocol method test1ApacheLogFormat.

/**
   * Tests a sample apache log format. This is actually better done in general
   * with a more TRegexLike protocol, but for this case, TCTLSeparatedProtocol
   * can do it.
   */
public void test1ApacheLogFormat() throws Exception {
    final String sample = "127.0.0.1 - frank [10/Oct/2000:13:55:36 -0700] \"GET /apache_pb.gif HTTP/1.0\" 200 2326";
    TMemoryBuffer trans = new TMemoryBuffer(4096);
    trans.write(sample.getBytes(), 0, sample.getBytes().length);
    trans.flush();
    TCTLSeparatedProtocol prot = new TCTLSeparatedProtocol(trans, 4096);
    Properties schema = new Properties();
    // this is a hacky way of doing the quotes since it will match any 2 of
    // these, so
    // "[ hello this is something to split [" would be considered to be quoted.
    schema.setProperty(serdeConstants.QUOTE_CHAR, "(\"|\\[|\\])");
    schema.setProperty(serdeConstants.FIELD_DELIM, " ");
    schema.setProperty(serdeConstants.SERIALIZATION_NULL_FORMAT, "-");
    prot.initialize(new Configuration(), schema);
    prot.readStructBegin();
    // ip address
    prot.readFieldBegin();
    final String ip = prot.readString();
    prot.readFieldEnd();
    assertEquals("127.0.0.1", ip);
    // identd
    prot.readFieldBegin();
    final String identd = prot.readString();
    prot.readFieldEnd();
    assertNull(identd);
    // user
    prot.readFieldBegin();
    final String user = prot.readString();
    prot.readFieldEnd();
    assertEquals("frank", user);
    // finishTime
    prot.readFieldBegin();
    final String finishTime = prot.readString();
    prot.readFieldEnd();
    assertEquals("10/Oct/2000:13:55:36 -0700", finishTime);
    // requestLine
    prot.readFieldBegin();
    final String requestLine = prot.readString();
    prot.readFieldEnd();
    assertEquals("GET /apache_pb.gif HTTP/1.0", requestLine);
    // returncode
    prot.readFieldBegin();
    final int returnCode = prot.readI32();
    prot.readFieldEnd();
    assertEquals(200, returnCode);
    // return size
    prot.readFieldBegin();
    final int returnSize = prot.readI32();
    prot.readFieldEnd();
    assertEquals(2326, returnSize);
    prot.readStructEnd();
}
Also used : TMemoryBuffer(org.apache.thrift.transport.TMemoryBuffer) Configuration(org.apache.hadoop.conf.Configuration) Properties(java.util.Properties) TCTLSeparatedProtocol(org.apache.hadoop.hive.serde2.thrift.TCTLSeparatedProtocol)

Example 8 with TCTLSeparatedProtocol

use of org.apache.hadoop.hive.serde2.thrift.TCTLSeparatedProtocol in project hive by apache.

the class TestTCTLSeparatedProtocol method testQuotedWrites.

public void testQuotedWrites() throws Exception {
    TMemoryBuffer trans = new TMemoryBuffer(4096);
    TCTLSeparatedProtocol prot = new TCTLSeparatedProtocol(trans, 4096);
    Properties schema = new Properties();
    schema.setProperty(serdeConstants.QUOTE_CHAR, "\"");
    schema.setProperty(serdeConstants.FIELD_DELIM, ",");
    prot.initialize(new Configuration(), schema);
    String testStr = "\"hello, world!\"";
    prot.writeStructBegin(new TStruct());
    prot.writeFieldBegin(new TField());
    prot.writeString(testStr);
    prot.writeFieldEnd();
    prot.writeFieldBegin(new TField());
    prot.writeListBegin(new TList());
    prot.writeString("elem1");
    prot.writeString("elem2");
    prot.writeListEnd();
    prot.writeFieldEnd();
    prot.writeStructEnd();
    prot.writeString("\n");
    trans.flush();
    byte[] b = new byte[4096];
    int len = trans.read(b, 0, b.length);
    trans = new TMemoryBuffer(4096);
    trans.write(b, 0, len);
    prot = new TCTLSeparatedProtocol(trans, 1024);
    prot.initialize(new Configuration(), schema);
    prot.readStructBegin();
    prot.readFieldBegin();
    final String firstRead = prot.readString();
    prot.readFieldEnd();
    testStr = testStr.replace("\"", "");
    assertEquals(testStr, firstRead);
    // the 2 element list
    prot.readFieldBegin();
    TList l = prot.readListBegin();
    assertTrue(l.size == 2);
    assertTrue(prot.readString().equals("elem1"));
    assertTrue(prot.readString().equals("elem2"));
    prot.readListEnd();
    prot.readFieldEnd();
    // shouldl return nulls at end
    prot.readFieldBegin();
    assertNull(prot.readString());
    prot.readFieldEnd();
    // shouldl return nulls at end
    prot.readFieldBegin();
    assertNull(prot.readString());
    prot.readFieldEnd();
    prot.readStructEnd();
}
Also used : TList(org.apache.thrift.protocol.TList) TMemoryBuffer(org.apache.thrift.transport.TMemoryBuffer) Configuration(org.apache.hadoop.conf.Configuration) TField(org.apache.thrift.protocol.TField) Properties(java.util.Properties) TStruct(org.apache.thrift.protocol.TStruct) TCTLSeparatedProtocol(org.apache.hadoop.hive.serde2.thrift.TCTLSeparatedProtocol)

Example 9 with TCTLSeparatedProtocol

use of org.apache.hadoop.hive.serde2.thrift.TCTLSeparatedProtocol in project hive by apache.

the class TestDynamicSerDe method testSkip.

public void testSkip() throws Throwable {
    try {
        // Try to construct an object
        ArrayList<String> bye = new ArrayList<String>();
        bye.add("firstString");
        bye.add("secondString");
        LinkedHashMap<String, Integer> another = new LinkedHashMap<String, Integer>();
        another.put("firstKey", 1);
        another.put("secondKey", 2);
        ArrayList<Object> struct = new ArrayList<Object>();
        struct.add(Integer.valueOf(234));
        struct.add(bye);
        struct.add(another);
        Properties schema = new Properties();
        schema.setProperty(serdeConstants.SERIALIZATION_FORMAT, org.apache.hadoop.hive.serde2.thrift.TCTLSeparatedProtocol.class.getName());
        schema.setProperty(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_NAME, "test");
        schema.setProperty(serdeConstants.SERIALIZATION_DDL, "struct test { i32 hello, list<string> bye, map<string,i32> another}");
        schema.setProperty(serdeConstants.SERIALIZATION_LIB, new DynamicSerDe().getClass().toString());
        schema.setProperty(serdeConstants.FIELD_DELIM, "9");
        schema.setProperty(serdeConstants.COLLECTION_DELIM, "1");
        schema.setProperty(serdeConstants.LINE_DELIM, "2");
        schema.setProperty(serdeConstants.MAPKEY_DELIM, "4");
        DynamicSerDe serde = new DynamicSerDe();
        serde.initialize(new Configuration(), schema);
        TCTLSeparatedProtocol prot = (TCTLSeparatedProtocol) serde.oprot_;
        assertTrue(prot.getPrimarySeparator().equals("	"));
        ObjectInspector oi = serde.getObjectInspector();
        // Try to serialize
        BytesWritable bytes = (BytesWritable) serde.serialize(struct, oi);
        hexString(bytes);
        String compare = "234" + "	" + "firstString" + "" + "secondString" + "	" + "firstKey" + "" + "1" + "" + "secondKey" + "" + "2";
        System.out.println("bytes in text =" + new String(bytes.get(), 0, bytes.getSize()) + ">");
        System.out.println("compare to    =" + compare + ">");
        assertTrue(compare.equals(new String(bytes.get(), 0, bytes.getSize())));
        schema.setProperty(serdeConstants.SERIALIZATION_DDL, "struct test { i32 hello, skip list<string> bye, map<string,i32> another}");
        serde.initialize(new Configuration(), schema);
        // Try to deserialize
        Object o = serde.deserialize(bytes);
        System.out.println("o class = " + o.getClass());
        List<?> olist = (List<?>) o;
        System.out.println("o size = " + olist.size());
        System.out.println("o = " + o);
        assertEquals(null, olist.get(1));
        // set the skipped field to null
        struct.set(1, null);
        assertEquals(o, struct);
    } catch (Throwable e) {
        e.printStackTrace();
        throw e;
    }
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) Configuration(org.apache.hadoop.conf.Configuration) ArrayList(java.util.ArrayList) BytesWritable(org.apache.hadoop.io.BytesWritable) Properties(java.util.Properties) LinkedHashMap(java.util.LinkedHashMap) ArrayList(java.util.ArrayList) List(java.util.List) TCTLSeparatedProtocol(org.apache.hadoop.hive.serde2.thrift.TCTLSeparatedProtocol)

Aggregations

Properties (java.util.Properties)9 TCTLSeparatedProtocol (org.apache.hadoop.hive.serde2.thrift.TCTLSeparatedProtocol)9 Configuration (org.apache.hadoop.conf.Configuration)8 TMemoryBuffer (org.apache.thrift.transport.TMemoryBuffer)5 ArrayList (java.util.ArrayList)3 LinkedHashMap (java.util.LinkedHashMap)3 List (java.util.List)3 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)3 BytesWritable (org.apache.hadoop.io.BytesWritable)3 TField (org.apache.thrift.protocol.TField)3 TMap (org.apache.thrift.protocol.TMap)3 TStruct (org.apache.thrift.protocol.TStruct)3 TList (org.apache.thrift.protocol.TList)2 HashMap (java.util.HashMap)1 TTransport (org.apache.thrift.transport.TTransport)1 TTransportException (org.apache.thrift.transport.TTransportException)1