use of org.apache.hadoop.hive.serde2.thrift.TCTLSeparatedProtocol in project hive by apache.
the class TestDynamicSerDe method testDynamicSerDe.
public void testDynamicSerDe() throws Throwable {
try {
// Try to construct an object
ArrayList<String> bye = new ArrayList<String>();
bye.add("firstString");
bye.add("secondString");
HashMap<String, Integer> another = new HashMap<String, Integer>();
another.put("firstKey", 1);
another.put("secondKey", 2);
ArrayList<Object> struct = new ArrayList<Object>();
struct.add(Integer.valueOf(234));
struct.add(bye);
struct.add(another);
struct.add(Integer.valueOf(-234));
struct.add(Double.valueOf(1.0));
struct.add(Double.valueOf(-2.5));
// All protocols
ArrayList<String> protocols = new ArrayList<String>();
ArrayList<Boolean> isBinaries = new ArrayList<Boolean>();
ArrayList<HashMap<String, String>> additionalParams = new ArrayList<HashMap<String, String>>();
protocols.add(org.apache.hadoop.hive.serde2.thrift.TBinarySortableProtocol.class.getName());
isBinaries.add(true);
additionalParams.add(makeHashMap("serialization.sort.order", "++++++"));
protocols.add(org.apache.hadoop.hive.serde2.thrift.TBinarySortableProtocol.class.getName());
isBinaries.add(true);
additionalParams.add(makeHashMap("serialization.sort.order", "------"));
protocols.add(org.apache.thrift.protocol.TBinaryProtocol.class.getName());
isBinaries.add(true);
additionalParams.add(null);
protocols.add(org.apache.thrift.protocol.TJSONProtocol.class.getName());
isBinaries.add(false);
additionalParams.add(null);
// TSimpleJSONProtocol does not support deserialization.
// protocols.add(org.apache.thrift.protocol.TSimpleJSONProtocol.class.getName());
// isBinaries.add(false);
// additionalParams.add(null);
// TCTLSeparatedProtocol is not done yet.
protocols.add(org.apache.hadoop.hive.serde2.thrift.TCTLSeparatedProtocol.class.getName());
isBinaries.add(false);
additionalParams.add(null);
System.out.println("input struct = " + struct);
for (int pp = 0; pp < protocols.size(); pp++) {
String protocol = protocols.get(pp);
boolean isBinary = isBinaries.get(pp);
System.out.println("Testing protocol: " + protocol);
Properties schema = new Properties();
schema.setProperty(serdeConstants.SERIALIZATION_FORMAT, protocol);
schema.setProperty(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_NAME, "test");
schema.setProperty(serdeConstants.SERIALIZATION_DDL, "struct test { i32 _hello, list<string> 2bye, map<string,i32> another, i32 nhello, double d, double nd}");
schema.setProperty(serdeConstants.SERIALIZATION_LIB, new DynamicSerDe().getClass().toString());
HashMap<String, String> p = additionalParams.get(pp);
if (p != null) {
for (Entry<String, String> e : p.entrySet()) {
schema.setProperty(e.getKey(), e.getValue());
}
}
DynamicSerDe serde = new DynamicSerDe();
serde.initialize(new Configuration(), schema);
// Try getObjectInspector
ObjectInspector oi = serde.getObjectInspector();
System.out.println("TypeName = " + oi.getTypeName());
// Try to serialize
BytesWritable bytes = (BytesWritable) serde.serialize(struct, oi);
System.out.println("bytes =" + hexString(bytes));
if (!isBinary) {
System.out.println("bytes in text =" + new String(bytes.get(), 0, bytes.getSize()));
}
// Try to deserialize
Object o = serde.deserialize(bytes);
System.out.println("o class = " + o.getClass());
List<?> olist = (List<?>) o;
System.out.println("o size = " + olist.size());
System.out.println("o[0] class = " + olist.get(0).getClass());
System.out.println("o[1] class = " + olist.get(1).getClass());
System.out.println("o[2] class = " + olist.get(2).getClass());
System.out.println("o = " + o);
assertEquals(struct, o);
}
} catch (Throwable e) {
e.printStackTrace();
throw e;
}
}
use of org.apache.hadoop.hive.serde2.thrift.TCTLSeparatedProtocol in project hive by apache.
the class TestTCTLSeparatedProtocol method testShouldThrowRunTimeExceptionIfUnableToInitializeTokenizer.
public void testShouldThrowRunTimeExceptionIfUnableToInitializeTokenizer() throws Exception {
TCTLSeparatedProtocol separatedProtocol = new TCTLSeparatedProtocol(new TTransport() {
@Override
public void close() {
}
@Override
public boolean isOpen() {
return false;
}
@Override
public void open() throws TTransportException {
}
@Override
public int read(byte[] buf, int off, int len) throws TTransportException {
throw new TTransportException();
}
@Override
public void write(byte[] buf, int off, int len) throws TTransportException {
}
});
separatedProtocol.initialize(null, new Properties());
try {
separatedProtocol.readStructBegin();
fail("Runtime Exception is expected if the intialization of tokenizer failed.");
} catch (Exception e) {
assertTrue(e.getCause() instanceof TTransportException);
}
}
use of org.apache.hadoop.hive.serde2.thrift.TCTLSeparatedProtocol in project hive by apache.
the class TestTCTLSeparatedProtocol method testReads.
public void testReads() throws Exception {
TMemoryBuffer trans = new TMemoryBuffer(1024);
String foo = "Hello";
String bar = "World!";
String key = "22";
String value = "TheValue";
String key2 = "24";
String value2 = "TheValueAgain";
byte[] columnSeparator = { 1 };
byte[] elementSeparator = { 2 };
byte[] kvSeparator = { 3 };
trans.write(foo.getBytes(), 0, foo.getBytes().length);
trans.write(columnSeparator, 0, 1);
trans.write(columnSeparator, 0, 1);
trans.write(bar.getBytes(), 0, bar.getBytes().length);
trans.write(columnSeparator, 0, 1);
trans.write(key.getBytes(), 0, key.getBytes().length);
trans.write(kvSeparator, 0, 1);
trans.write(value.getBytes(), 0, value.getBytes().length);
trans.write(elementSeparator, 0, 1);
trans.write(key2.getBytes(), 0, key2.getBytes().length);
trans.write(kvSeparator, 0, 1);
trans.write(value2.getBytes(), 0, value2.getBytes().length);
trans.flush();
// use 3 as the row buffer size to force lots of re-buffering.
TCTLSeparatedProtocol prot = new TCTLSeparatedProtocol(trans, 1024);
prot.initialize(new Configuration(), new Properties());
prot.readStructBegin();
prot.readFieldBegin();
String hello = prot.readString();
prot.readFieldEnd();
assertTrue(hello.equals(foo));
prot.readFieldBegin();
assertTrue(prot.readString().equals(""));
prot.readFieldEnd();
prot.readFieldBegin();
assertTrue(prot.readString().equals(bar));
prot.readFieldEnd();
prot.readFieldBegin();
TMap mapHeader = prot.readMapBegin();
assertTrue(mapHeader.size == 2);
assertTrue(prot.readI32() == 22);
assertTrue(prot.readString().equals(value));
assertTrue(prot.readI32() == 24);
assertTrue(prot.readString().equals(value2));
prot.readMapEnd();
prot.readFieldEnd();
prot.readFieldBegin();
hello = prot.readString();
prot.readFieldEnd();
assertNull(hello);
prot.readStructEnd();
}
use of org.apache.hadoop.hive.serde2.thrift.TCTLSeparatedProtocol in project hive by apache.
the class TestTCTLSeparatedProtocol method testNulls.
public void testNulls() throws Exception {
TMemoryBuffer trans = new TMemoryBuffer(1024);
TCTLSeparatedProtocol prot = new TCTLSeparatedProtocol(trans, 10);
prot.initialize(new Configuration(), new Properties());
prot.writeStructBegin(new TStruct());
prot.writeFieldBegin(new TField());
prot.writeString(null);
prot.writeFieldEnd();
prot.writeFieldBegin(new TField());
prot.writeString(null);
prot.writeFieldEnd();
prot.writeFieldBegin(new TField());
prot.writeI32(100);
prot.writeFieldEnd();
prot.writeFieldBegin(new TField());
prot.writeString(null);
prot.writeFieldEnd();
prot.writeFieldBegin(new TField());
prot.writeMapBegin(new TMap());
prot.writeString(null);
prot.writeString(null);
prot.writeString("key2");
prot.writeString(null);
prot.writeString(null);
prot.writeString("val3");
prot.writeMapEnd();
prot.writeFieldEnd();
prot.writeStructEnd();
byte[] b = new byte[3 * 1024];
int len = trans.read(b, 0, b.length);
String written = new String(b, 0, len);
String testRef = "\\N\\N100\\N\\N\\Nkey2\\N\\Nval3";
assertTrue(testRef.equals(written));
trans = new TMemoryBuffer(1023);
trans.write(b, 0, len);
prot = new TCTLSeparatedProtocol(trans, 3);
prot.initialize(new Configuration(), new Properties());
prot.readStructBegin();
prot.readFieldBegin();
String ret = prot.readString();
prot.readFieldEnd();
assertNull(ret);
prot.readFieldBegin();
ret = prot.readString();
prot.readFieldEnd();
assertNull(ret);
prot.readFieldBegin();
int ret1 = prot.readI32();
prot.readFieldEnd();
assertTrue(ret1 == 100);
prot.readFieldBegin();
ret1 = prot.readI32();
prot.readFieldEnd();
prot.readFieldBegin();
TMap map = prot.readMapBegin();
assertTrue(map.size == 3);
assertNull(prot.readString());
assertNull(prot.readString());
assertTrue(prot.readString().equals("key2"));
assertNull(prot.readString());
assertNull(prot.readString());
assertTrue(prot.readString().equals("val3"));
prot.readMapEnd();
prot.readFieldEnd();
assertTrue(ret1 == 0);
}
use of org.apache.hadoop.hive.serde2.thrift.TCTLSeparatedProtocol in project hive by apache.
the class TestDynamicSerDe method testConfigurableTCTLSeparated.
public void testConfigurableTCTLSeparated() throws Throwable {
try {
// Try to construct an object
ArrayList<String> bye = new ArrayList<String>();
bye.add("firstString");
bye.add("secondString");
LinkedHashMap<String, Integer> another = new LinkedHashMap<String, Integer>();
another.put("firstKey", 1);
another.put("secondKey", 2);
ArrayList<Object> struct = new ArrayList<Object>();
struct.add(Integer.valueOf(234));
struct.add(bye);
struct.add(another);
Properties schema = new Properties();
schema.setProperty(serdeConstants.SERIALIZATION_FORMAT, org.apache.hadoop.hive.serde2.thrift.TCTLSeparatedProtocol.class.getName());
schema.setProperty(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_NAME, "test");
schema.setProperty(serdeConstants.SERIALIZATION_DDL, "struct test { i32 hello, list<string> bye, map<string,i32> another}");
schema.setProperty(serdeConstants.SERIALIZATION_LIB, new DynamicSerDe().getClass().toString());
schema.setProperty(serdeConstants.FIELD_DELIM, "9");
schema.setProperty(serdeConstants.COLLECTION_DELIM, "1");
schema.setProperty(serdeConstants.LINE_DELIM, "2");
schema.setProperty(serdeConstants.MAPKEY_DELIM, "4");
DynamicSerDe serde = new DynamicSerDe();
serde.initialize(new Configuration(), schema);
TCTLSeparatedProtocol prot = (TCTLSeparatedProtocol) serde.oprot_;
assertTrue(prot.getPrimarySeparator().equals("\u0009"));
ObjectInspector oi = serde.getObjectInspector();
// Try to serialize
BytesWritable bytes = (BytesWritable) serde.serialize(struct, oi);
hexString(bytes);
String compare = "234" + "\u0009" + "firstString" + "\u0001" + "secondString" + "\u0009" + "firstKey" + "\u0004" + "1" + "\u0001" + "secondKey" + "\u0004" + "2";
System.out.println("bytes in text =" + new String(bytes.get(), 0, bytes.getSize()) + ">");
System.out.println("compare to =" + compare + ">");
assertTrue(compare.equals(new String(bytes.get(), 0, bytes.getSize())));
// Try to deserialize
Object o = serde.deserialize(bytes);
System.out.println("o class = " + o.getClass());
List<?> olist = (List<?>) o;
System.out.println("o size = " + olist.size());
System.out.println("o[0] class = " + olist.get(0).getClass());
System.out.println("o[1] class = " + olist.get(1).getClass());
System.out.println("o[2] class = " + olist.get(2).getClass());
System.out.println("o = " + o);
assertEquals(o, struct);
} catch (Throwable e) {
e.printStackTrace();
throw e;
}
}
Aggregations