use of org.apache.hadoop.io.BytesWritable in project hive by apache.
the class TestDynamicSerDe method testDynamicSerDe.
public void testDynamicSerDe() throws Throwable {
try {
// Try to construct an object
ArrayList<String> bye = new ArrayList<String>();
bye.add("firstString");
bye.add("secondString");
HashMap<String, Integer> another = new HashMap<String, Integer>();
another.put("firstKey", 1);
another.put("secondKey", 2);
ArrayList<Object> struct = new ArrayList<Object>();
struct.add(Integer.valueOf(234));
struct.add(bye);
struct.add(another);
struct.add(Integer.valueOf(-234));
struct.add(Double.valueOf(1.0));
struct.add(Double.valueOf(-2.5));
// All protocols
ArrayList<String> protocols = new ArrayList<String>();
ArrayList<Boolean> isBinaries = new ArrayList<Boolean>();
ArrayList<HashMap<String, String>> additionalParams = new ArrayList<HashMap<String, String>>();
protocols.add(org.apache.hadoop.hive.serde2.thrift.TBinarySortableProtocol.class.getName());
isBinaries.add(true);
additionalParams.add(makeHashMap("serialization.sort.order", "++++++"));
protocols.add(org.apache.hadoop.hive.serde2.thrift.TBinarySortableProtocol.class.getName());
isBinaries.add(true);
additionalParams.add(makeHashMap("serialization.sort.order", "------"));
protocols.add(org.apache.thrift.protocol.TBinaryProtocol.class.getName());
isBinaries.add(true);
additionalParams.add(null);
protocols.add(org.apache.thrift.protocol.TJSONProtocol.class.getName());
isBinaries.add(false);
additionalParams.add(null);
// TSimpleJSONProtocol does not support deserialization.
// protocols.add(org.apache.thrift.protocol.TSimpleJSONProtocol.class.getName());
// isBinaries.add(false);
// additionalParams.add(null);
// TCTLSeparatedProtocol is not done yet.
protocols.add(org.apache.hadoop.hive.serde2.thrift.TCTLSeparatedProtocol.class.getName());
isBinaries.add(false);
additionalParams.add(null);
System.out.println("input struct = " + struct);
for (int pp = 0; pp < protocols.size(); pp++) {
String protocol = protocols.get(pp);
boolean isBinary = isBinaries.get(pp);
System.out.println("Testing protocol: " + protocol);
Properties schema = new Properties();
schema.setProperty(serdeConstants.SERIALIZATION_FORMAT, protocol);
schema.setProperty(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_NAME, "test");
schema.setProperty(serdeConstants.SERIALIZATION_DDL, "struct test { i32 _hello, list<string> 2bye, map<string,i32> another, i32 nhello, double d, double nd}");
schema.setProperty(serdeConstants.SERIALIZATION_LIB, new DynamicSerDe().getClass().toString());
HashMap<String, String> p = additionalParams.get(pp);
if (p != null) {
for (Entry<String, String> e : p.entrySet()) {
schema.setProperty(e.getKey(), e.getValue());
}
}
DynamicSerDe serde = new DynamicSerDe();
serde.initialize(new Configuration(), schema);
// Try getObjectInspector
ObjectInspector oi = serde.getObjectInspector();
System.out.println("TypeName = " + oi.getTypeName());
// Try to serialize
BytesWritable bytes = (BytesWritable) serde.serialize(struct, oi);
System.out.println("bytes =" + hexString(bytes));
if (!isBinary) {
System.out.println("bytes in text =" + new String(bytes.get(), 0, bytes.getSize()));
}
// Try to deserialize
Object o = serde.deserialize(bytes);
System.out.println("o class = " + o.getClass());
List<?> olist = (List<?>) o;
System.out.println("o size = " + olist.size());
System.out.println("o[0] class = " + olist.get(0).getClass());
System.out.println("o[1] class = " + olist.get(1).getClass());
System.out.println("o[2] class = " + olist.get(2).getClass());
System.out.println("o = " + o);
assertEquals(struct, o);
}
} catch (Throwable e) {
e.printStackTrace();
throw e;
}
}
use of org.apache.hadoop.io.BytesWritable in project hive by apache.
the class MultiDelimitSerDe method serializeNoEncode.
// This is basically the same as LazySimpleSerDe.serialize. Except that we don't use
// Base64 to encode binary data because we're using printable string as delimiter.
// Consider such a row "strAQ==\1", str is a string, AQ== is the delimiter and \1
// is the binary data.
private static void serializeNoEncode(ByteStream.Output out, Object obj, ObjectInspector objInspector, byte[] separators, int level, Text nullSequence, boolean escaped, byte escapeChar, boolean[] needsEscape) throws IOException, SerDeException {
if (obj == null) {
out.write(nullSequence.getBytes(), 0, nullSequence.getLength());
return;
}
char separator;
List<?> list;
switch(objInspector.getCategory()) {
case PRIMITIVE:
PrimitiveObjectInspector oi = (PrimitiveObjectInspector) objInspector;
if (oi.getPrimitiveCategory() == PrimitiveCategory.BINARY) {
BytesWritable bw = ((BinaryObjectInspector) oi).getPrimitiveWritableObject(obj);
byte[] toWrite = new byte[bw.getLength()];
System.arraycopy(bw.getBytes(), 0, toWrite, 0, bw.getLength());
out.write(toWrite, 0, toWrite.length);
} else {
LazyUtils.writePrimitiveUTF8(out, obj, oi, escaped, escapeChar, needsEscape);
}
return;
case LIST:
separator = (char) separators[level];
ListObjectInspector loi = (ListObjectInspector) objInspector;
list = loi.getList(obj);
ObjectInspector eoi = loi.getListElementObjectInspector();
if (list == null) {
out.write(nullSequence.getBytes(), 0, nullSequence.getLength());
} else {
for (int i = 0; i < list.size(); i++) {
if (i > 0) {
out.write(separator);
}
serializeNoEncode(out, list.get(i), eoi, separators, level + 1, nullSequence, escaped, escapeChar, needsEscape);
}
}
return;
case MAP:
separator = (char) separators[level];
char keyValueSeparator = (char) separators[level + 1];
MapObjectInspector moi = (MapObjectInspector) objInspector;
ObjectInspector koi = moi.getMapKeyObjectInspector();
ObjectInspector voi = moi.getMapValueObjectInspector();
Map<?, ?> map = moi.getMap(obj);
if (map == null) {
out.write(nullSequence.getBytes(), 0, nullSequence.getLength());
} else {
boolean first = true;
for (Map.Entry<?, ?> entry : map.entrySet()) {
if (first) {
first = false;
} else {
out.write(separator);
}
serializeNoEncode(out, entry.getKey(), koi, separators, level + 2, nullSequence, escaped, escapeChar, needsEscape);
out.write(keyValueSeparator);
serializeNoEncode(out, entry.getValue(), voi, separators, level + 2, nullSequence, escaped, escapeChar, needsEscape);
}
}
return;
case STRUCT:
separator = (char) separators[level];
StructObjectInspector soi = (StructObjectInspector) objInspector;
List<? extends StructField> fields = soi.getAllStructFieldRefs();
list = soi.getStructFieldsDataAsList(obj);
if (list == null) {
out.write(nullSequence.getBytes(), 0, nullSequence.getLength());
} else {
for (int i = 0; i < list.size(); i++) {
if (i > 0) {
out.write(separator);
}
serializeNoEncode(out, list.get(i), fields.get(i).getFieldObjectInspector(), separators, level + 1, nullSequence, escaped, escapeChar, needsEscape);
}
}
return;
}
throw new RuntimeException("Unknown category type: " + objInspector.getCategory());
}
use of org.apache.hadoop.io.BytesWritable in project hive by apache.
the class VectorUDFAdaptor method setOutputCol.
private void setOutputCol(ColumnVector colVec, int i, Object value) {
/* Depending on the output type, get the value, cast the result to the
* correct type if needed, and assign the result into the output vector.
*/
if (outputOI instanceof WritableStringObjectInspector) {
BytesColumnVector bv = (BytesColumnVector) colVec;
Text t;
if (value instanceof String) {
t = new Text((String) value);
} else {
t = ((WritableStringObjectInspector) outputOI).getPrimitiveWritableObject(value);
}
bv.setVal(i, t.getBytes(), 0, t.getLength());
} else if (outputOI instanceof WritableHiveCharObjectInspector) {
WritableHiveCharObjectInspector writableHiveCharObjectOI = (WritableHiveCharObjectInspector) outputOI;
int maxLength = ((CharTypeInfo) writableHiveCharObjectOI.getTypeInfo()).getLength();
BytesColumnVector bv = (BytesColumnVector) colVec;
HiveCharWritable hiveCharWritable;
if (value instanceof HiveCharWritable) {
hiveCharWritable = ((HiveCharWritable) value);
} else {
hiveCharWritable = writableHiveCharObjectOI.getPrimitiveWritableObject(value);
}
Text t = hiveCharWritable.getTextValue();
// In vector mode, we stored CHAR as unpadded.
StringExpr.rightTrimAndTruncate(bv, i, t.getBytes(), 0, t.getLength(), maxLength);
} else if (outputOI instanceof WritableHiveVarcharObjectInspector) {
WritableHiveVarcharObjectInspector writableHiveVarcharObjectOI = (WritableHiveVarcharObjectInspector) outputOI;
int maxLength = ((VarcharTypeInfo) writableHiveVarcharObjectOI.getTypeInfo()).getLength();
BytesColumnVector bv = (BytesColumnVector) colVec;
HiveVarcharWritable hiveVarcharWritable;
if (value instanceof HiveVarcharWritable) {
hiveVarcharWritable = ((HiveVarcharWritable) value);
} else {
hiveVarcharWritable = writableHiveVarcharObjectOI.getPrimitiveWritableObject(value);
}
Text t = hiveVarcharWritable.getTextValue();
StringExpr.truncate(bv, i, t.getBytes(), 0, t.getLength(), maxLength);
} else if (outputOI instanceof WritableIntObjectInspector) {
LongColumnVector lv = (LongColumnVector) colVec;
if (value instanceof Integer) {
lv.vector[i] = (Integer) value;
} else {
lv.vector[i] = ((WritableIntObjectInspector) outputOI).get(value);
}
} else if (outputOI instanceof WritableLongObjectInspector) {
LongColumnVector lv = (LongColumnVector) colVec;
if (value instanceof Long) {
lv.vector[i] = (Long) value;
} else {
lv.vector[i] = ((WritableLongObjectInspector) outputOI).get(value);
}
} else if (outputOI instanceof WritableDoubleObjectInspector) {
DoubleColumnVector dv = (DoubleColumnVector) colVec;
if (value instanceof Double) {
dv.vector[i] = (Double) value;
} else {
dv.vector[i] = ((WritableDoubleObjectInspector) outputOI).get(value);
}
} else if (outputOI instanceof WritableFloatObjectInspector) {
DoubleColumnVector dv = (DoubleColumnVector) colVec;
if (value instanceof Float) {
dv.vector[i] = (Float) value;
} else {
dv.vector[i] = ((WritableFloatObjectInspector) outputOI).get(value);
}
} else if (outputOI instanceof WritableShortObjectInspector) {
LongColumnVector lv = (LongColumnVector) colVec;
if (value instanceof Short) {
lv.vector[i] = (Short) value;
} else {
lv.vector[i] = ((WritableShortObjectInspector) outputOI).get(value);
}
} else if (outputOI instanceof WritableByteObjectInspector) {
LongColumnVector lv = (LongColumnVector) colVec;
if (value instanceof Byte) {
lv.vector[i] = (Byte) value;
} else {
lv.vector[i] = ((WritableByteObjectInspector) outputOI).get(value);
}
} else if (outputOI instanceof WritableTimestampObjectInspector) {
TimestampColumnVector tv = (TimestampColumnVector) colVec;
Timestamp ts;
if (value instanceof Timestamp) {
ts = (Timestamp) value;
} else {
ts = ((WritableTimestampObjectInspector) outputOI).getPrimitiveJavaObject(value);
}
tv.set(i, ts);
} else if (outputOI instanceof WritableDateObjectInspector) {
LongColumnVector lv = (LongColumnVector) colVec;
Date ts;
if (value instanceof Date) {
ts = (Date) value;
} else {
ts = ((WritableDateObjectInspector) outputOI).getPrimitiveJavaObject(value);
}
long l = DateWritable.dateToDays(ts);
lv.vector[i] = l;
} else if (outputOI instanceof WritableBooleanObjectInspector) {
LongColumnVector lv = (LongColumnVector) colVec;
if (value instanceof Boolean) {
lv.vector[i] = (Boolean) value ? 1 : 0;
} else {
lv.vector[i] = ((WritableBooleanObjectInspector) outputOI).get(value) ? 1 : 0;
}
} else if (outputOI instanceof WritableHiveDecimalObjectInspector) {
DecimalColumnVector dcv = (DecimalColumnVector) colVec;
if (value instanceof HiveDecimal) {
dcv.set(i, (HiveDecimal) value);
} else {
HiveDecimal hd = ((WritableHiveDecimalObjectInspector) outputOI).getPrimitiveJavaObject(value);
dcv.set(i, hd);
}
} else if (outputOI instanceof WritableBinaryObjectInspector) {
BytesWritable bw = (BytesWritable) value;
BytesColumnVector bv = (BytesColumnVector) colVec;
bv.setVal(i, bw.getBytes(), 0, bw.getLength());
} else {
throw new RuntimeException("Unhandled object type " + outputOI.getTypeName() + " inspector class " + outputOI.getClass().getName() + " value class " + value.getClass().getName());
}
}
use of org.apache.hadoop.io.BytesWritable in project hive by apache.
the class HBaseUtils method desierliazeDbNameTableNameFromPartitionKey.
private static List<String> desierliazeDbNameTableNameFromPartitionKey(byte[] key, Configuration conf) {
StringBuffer names = new StringBuffer();
names.append("dbName,tableName,");
StringBuffer types = new StringBuffer();
types.append("string,string,");
BinarySortableSerDe serDe = new BinarySortableSerDe();
Properties props = new Properties();
props.setProperty(serdeConstants.LIST_COLUMNS, names.toString());
props.setProperty(serdeConstants.LIST_COLUMN_TYPES, types.toString());
try {
serDe.initialize(conf, props);
List deserializedkeys = ((List) serDe.deserialize(new BytesWritable(key))).subList(0, 2);
List<String> keys = new ArrayList<>();
for (int i = 0; i < deserializedkeys.size(); i++) {
Object deserializedKey = deserializedkeys.get(i);
if (deserializedKey == null) {
throw new RuntimeException("Can't have a null dbname or tablename");
} else {
TypeInfo inputType = TypeInfoUtils.getTypeInfoFromTypeString("string");
ObjectInspector inputOI = TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(inputType);
Converter converter = ObjectInspectorConverters.getConverter(inputOI, PrimitiveObjectInspectorFactory.javaStringObjectInspector);
keys.add((String) converter.convert(deserializedKey));
}
}
return keys;
} catch (SerDeException e) {
throw new RuntimeException("Error when deserialize key", e);
}
}
use of org.apache.hadoop.io.BytesWritable in project hive by apache.
the class PartitionKeyComparator method compareTo.
@Override
public int compareTo(byte[] value, int offset, int length) {
byte[] bytes = Arrays.copyOfRange(value, offset, offset + length);
if (LOG.isDebugEnabled()) {
LOG.debug("Get key " + new String(bytes));
}
BinarySortableSerDe serDe = new BinarySortableSerDe();
List deserializedkeys = null;
try {
serDe.initialize(new Configuration(), serdeProps);
deserializedkeys = ((List) serDe.deserialize(new BytesWritable(bytes))).subList(2, 2 + names.split(",").length);
} catch (SerDeException e) {
// don't bother with failed deserialization, continue with next key
return 1;
}
for (int i = 0; i < ranges.size(); i++) {
Range range = ranges.get(i);
NativeRange nativeRange = nativeRanges.get(i);
Comparable partVal = (Comparable) deserializedkeys.get(nativeRange.pos);
if (LOG.isDebugEnabled()) {
LOG.debug("Try to match range " + partVal + ", start " + nativeRange.start + ", end " + nativeRange.end);
}
if (range.start == null || range.start.inclusive && partVal.compareTo(nativeRange.start) >= 0 || !range.start.inclusive && partVal.compareTo(nativeRange.start) > 0) {
if (range.end == null || range.end.inclusive && partVal.compareTo(nativeRange.end) <= 0 || !range.end.inclusive && partVal.compareTo(nativeRange.end) < 0) {
continue;
}
}
if (LOG.isDebugEnabled()) {
LOG.debug("Fail to match range " + range.keyName + "-" + partVal + "[" + nativeRange.start + "," + nativeRange.end + "]");
}
return 1;
}
for (int i = 0; i < ops.size(); i++) {
Operator op = ops.get(i);
NativeOperator nativeOp = nativeOps.get(i);
switch(op.type) {
case LIKE:
if (!deserializedkeys.get(nativeOp.pos).toString().matches(op.val)) {
if (LOG.isDebugEnabled()) {
LOG.debug("Fail to match operator " + op.keyName + "(" + deserializedkeys.get(nativeOp.pos) + ") LIKE " + nativeOp.val);
}
return 1;
}
break;
case NOTEQUALS:
if (nativeOp.val.equals(deserializedkeys.get(nativeOp.pos))) {
if (LOG.isDebugEnabled()) {
LOG.debug("Fail to match operator " + op.keyName + "(" + deserializedkeys.get(nativeOp.pos) + ")!=" + nativeOp.val);
}
return 1;
}
break;
}
}
if (LOG.isDebugEnabled()) {
LOG.debug("All conditions satisfied:" + deserializedkeys);
}
return 0;
}
Aggregations