use of org.apache.hadoop.hive.serde2.thrift.TCTLSeparatedProtocol in project hive by apache.
the class TestDynamicSerDe method testDynamicSerDe.
public void testDynamicSerDe() throws Throwable {
try {
// Try to construct an object
ArrayList<String> bye = new ArrayList<String>();
bye.add("firstString");
bye.add("secondString");
HashMap<String, Integer> another = new HashMap<String, Integer>();
another.put("firstKey", 1);
another.put("secondKey", 2);
ArrayList<Object> struct = new ArrayList<Object>();
struct.add(Integer.valueOf(234));
struct.add(bye);
struct.add(another);
struct.add(Integer.valueOf(-234));
struct.add(Double.valueOf(1.0));
struct.add(Double.valueOf(-2.5));
// All protocols
ArrayList<String> protocols = new ArrayList<String>();
ArrayList<Boolean> isBinaries = new ArrayList<Boolean>();
ArrayList<HashMap<String, String>> additionalParams = new ArrayList<HashMap<String, String>>();
protocols.add(org.apache.hadoop.hive.serde2.thrift.TBinarySortableProtocol.class.getName());
isBinaries.add(true);
additionalParams.add(makeHashMap("serialization.sort.order", "++++++"));
protocols.add(org.apache.hadoop.hive.serde2.thrift.TBinarySortableProtocol.class.getName());
isBinaries.add(true);
additionalParams.add(makeHashMap("serialization.sort.order", "------"));
protocols.add(org.apache.thrift.protocol.TBinaryProtocol.class.getName());
isBinaries.add(true);
additionalParams.add(null);
protocols.add(org.apache.thrift.protocol.TJSONProtocol.class.getName());
isBinaries.add(false);
additionalParams.add(null);
// TSimpleJSONProtocol does not support deserialization.
// protocols.add(org.apache.thrift.protocol.TSimpleJSONProtocol.class.getName());
// isBinaries.add(false);
// additionalParams.add(null);
// TCTLSeparatedProtocol is not done yet.
protocols.add(org.apache.hadoop.hive.serde2.thrift.TCTLSeparatedProtocol.class.getName());
isBinaries.add(false);
additionalParams.add(null);
System.out.println("input struct = " + struct);
for (int pp = 0; pp < protocols.size(); pp++) {
String protocol = protocols.get(pp);
boolean isBinary = isBinaries.get(pp);
System.out.println("Testing protocol: " + protocol);
Properties schema = new Properties();
schema.setProperty(serdeConstants.SERIALIZATION_FORMAT, protocol);
schema.setProperty(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_NAME, "test");
schema.setProperty(serdeConstants.SERIALIZATION_DDL, "struct test { i32 _hello, list<string> 2bye, map<string,i32> another, i32 nhello, double d, double nd}");
schema.setProperty(serdeConstants.SERIALIZATION_LIB, new DynamicSerDe().getClass().toString());
HashMap<String, String> p = additionalParams.get(pp);
if (p != null) {
for (Entry<String, String> e : p.entrySet()) {
schema.setProperty(e.getKey(), e.getValue());
}
}
DynamicSerDe serde = new DynamicSerDe();
serde.initialize(new Configuration(), schema);
// Try getObjectInspector
ObjectInspector oi = serde.getObjectInspector();
System.out.println("TypeName = " + oi.getTypeName());
// Try to serialize
BytesWritable bytes = (BytesWritable) serde.serialize(struct, oi);
System.out.println("bytes =" + hexString(bytes));
if (!isBinary) {
System.out.println("bytes in text =" + new String(bytes.get(), 0, bytes.getSize()));
}
// Try to deserialize
Object o = serde.deserialize(bytes);
System.out.println("o class = " + o.getClass());
List<?> olist = (List<?>) o;
System.out.println("o size = " + olist.size());
System.out.println("o[0] class = " + olist.get(0).getClass());
System.out.println("o[1] class = " + olist.get(1).getClass());
System.out.println("o[2] class = " + olist.get(2).getClass());
System.out.println("o = " + o);
assertEquals(struct, o);
}
} catch (Throwable e) {
e.printStackTrace();
throw e;
}
}
use of org.apache.hadoop.hive.serde2.thrift.TCTLSeparatedProtocol in project hive by apache.
the class TestTCTLSeparatedProtocol method test1ApacheLogFormat.
/**
* Tests a sample apache log format. This is actually better done in general
* with a more TRegexLike protocol, but for this case, TCTLSeparatedProtocol
* can do it.
*/
public void test1ApacheLogFormat() throws Exception {
final String sample = "127.0.0.1 - frank [10/Oct/2000:13:55:36 -0700] \"GET /apache_pb.gif HTTP/1.0\" 200 2326";
TMemoryBuffer trans = new TMemoryBuffer(4096);
trans.write(sample.getBytes(), 0, sample.getBytes().length);
trans.flush();
TCTLSeparatedProtocol prot = new TCTLSeparatedProtocol(trans, 4096);
Properties schema = new Properties();
// this is a hacky way of doing the quotes since it will match any 2 of
// these, so
// "[ hello this is something to split [" would be considered to be quoted.
schema.setProperty(serdeConstants.QUOTE_CHAR, "(\"|\\[|\\])");
schema.setProperty(serdeConstants.FIELD_DELIM, " ");
schema.setProperty(serdeConstants.SERIALIZATION_NULL_FORMAT, "-");
prot.initialize(new Configuration(), schema);
prot.readStructBegin();
// ip address
prot.readFieldBegin();
final String ip = prot.readString();
prot.readFieldEnd();
assertEquals("127.0.0.1", ip);
// identd
prot.readFieldBegin();
final String identd = prot.readString();
prot.readFieldEnd();
assertNull(identd);
// user
prot.readFieldBegin();
final String user = prot.readString();
prot.readFieldEnd();
assertEquals("frank", user);
// finishTime
prot.readFieldBegin();
final String finishTime = prot.readString();
prot.readFieldEnd();
assertEquals("10/Oct/2000:13:55:36 -0700", finishTime);
// requestLine
prot.readFieldBegin();
final String requestLine = prot.readString();
prot.readFieldEnd();
assertEquals("GET /apache_pb.gif HTTP/1.0", requestLine);
// returncode
prot.readFieldBegin();
final int returnCode = prot.readI32();
prot.readFieldEnd();
assertEquals(200, returnCode);
// return size
prot.readFieldBegin();
final int returnSize = prot.readI32();
prot.readFieldEnd();
assertEquals(2326, returnSize);
prot.readStructEnd();
}
use of org.apache.hadoop.hive.serde2.thrift.TCTLSeparatedProtocol in project hive by apache.
the class TestTCTLSeparatedProtocol method testQuotedWrites.
public void testQuotedWrites() throws Exception {
TMemoryBuffer trans = new TMemoryBuffer(4096);
TCTLSeparatedProtocol prot = new TCTLSeparatedProtocol(trans, 4096);
Properties schema = new Properties();
schema.setProperty(serdeConstants.QUOTE_CHAR, "\"");
schema.setProperty(serdeConstants.FIELD_DELIM, ",");
prot.initialize(new Configuration(), schema);
String testStr = "\"hello, world!\"";
prot.writeStructBegin(new TStruct());
prot.writeFieldBegin(new TField());
prot.writeString(testStr);
prot.writeFieldEnd();
prot.writeFieldBegin(new TField());
prot.writeListBegin(new TList());
prot.writeString("elem1");
prot.writeString("elem2");
prot.writeListEnd();
prot.writeFieldEnd();
prot.writeStructEnd();
prot.writeString("\n");
trans.flush();
byte[] b = new byte[4096];
int len = trans.read(b, 0, b.length);
trans = new TMemoryBuffer(4096);
trans.write(b, 0, len);
prot = new TCTLSeparatedProtocol(trans, 1024);
prot.initialize(new Configuration(), schema);
prot.readStructBegin();
prot.readFieldBegin();
final String firstRead = prot.readString();
prot.readFieldEnd();
testStr = testStr.replace("\"", "");
assertEquals(testStr, firstRead);
// the 2 element list
prot.readFieldBegin();
TList l = prot.readListBegin();
assertTrue(l.size == 2);
assertTrue(prot.readString().equals("elem1"));
assertTrue(prot.readString().equals("elem2"));
prot.readListEnd();
prot.readFieldEnd();
// shouldl return nulls at end
prot.readFieldBegin();
assertNull(prot.readString());
prot.readFieldEnd();
// shouldl return nulls at end
prot.readFieldBegin();
assertNull(prot.readString());
prot.readFieldEnd();
prot.readStructEnd();
}
use of org.apache.hadoop.hive.serde2.thrift.TCTLSeparatedProtocol in project hive by apache.
the class TestDynamicSerDe method testSkip.
public void testSkip() throws Throwable {
try {
// Try to construct an object
ArrayList<String> bye = new ArrayList<String>();
bye.add("firstString");
bye.add("secondString");
LinkedHashMap<String, Integer> another = new LinkedHashMap<String, Integer>();
another.put("firstKey", 1);
another.put("secondKey", 2);
ArrayList<Object> struct = new ArrayList<Object>();
struct.add(Integer.valueOf(234));
struct.add(bye);
struct.add(another);
Properties schema = new Properties();
schema.setProperty(serdeConstants.SERIALIZATION_FORMAT, org.apache.hadoop.hive.serde2.thrift.TCTLSeparatedProtocol.class.getName());
schema.setProperty(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_NAME, "test");
schema.setProperty(serdeConstants.SERIALIZATION_DDL, "struct test { i32 hello, list<string> bye, map<string,i32> another}");
schema.setProperty(serdeConstants.SERIALIZATION_LIB, new DynamicSerDe().getClass().toString());
schema.setProperty(serdeConstants.FIELD_DELIM, "9");
schema.setProperty(serdeConstants.COLLECTION_DELIM, "1");
schema.setProperty(serdeConstants.LINE_DELIM, "2");
schema.setProperty(serdeConstants.MAPKEY_DELIM, "4");
DynamicSerDe serde = new DynamicSerDe();
serde.initialize(new Configuration(), schema);
TCTLSeparatedProtocol prot = (TCTLSeparatedProtocol) serde.oprot_;
assertTrue(prot.getPrimarySeparator().equals(" "));
ObjectInspector oi = serde.getObjectInspector();
// Try to serialize
BytesWritable bytes = (BytesWritable) serde.serialize(struct, oi);
hexString(bytes);
String compare = "234" + " " + "firstString" + "" + "secondString" + " " + "firstKey" + "" + "1" + "" + "secondKey" + "" + "2";
System.out.println("bytes in text =" + new String(bytes.get(), 0, bytes.getSize()) + ">");
System.out.println("compare to =" + compare + ">");
assertTrue(compare.equals(new String(bytes.get(), 0, bytes.getSize())));
schema.setProperty(serdeConstants.SERIALIZATION_DDL, "struct test { i32 hello, skip list<string> bye, map<string,i32> another}");
serde.initialize(new Configuration(), schema);
// Try to deserialize
Object o = serde.deserialize(bytes);
System.out.println("o class = " + o.getClass());
List<?> olist = (List<?>) o;
System.out.println("o size = " + olist.size());
System.out.println("o = " + o);
assertEquals(null, olist.get(1));
// set the skipped field to null
struct.set(1, null);
assertEquals(o, struct);
} catch (Throwable e) {
e.printStackTrace();
throw e;
}
}
Aggregations