use of org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe in project hive by apache.
the class TestLazyBinaryColumnarSerDe method testSerDeOuterNulls.
public void testSerDeOuterNulls() throws SerDeException {
StructObjectInspector oi = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(OuterStruct.class, ObjectInspectorOptions.JAVA);
String cols = ObjectInspectorUtils.getFieldNames(oi);
Properties props = new Properties();
props.setProperty(serdeConstants.LIST_COLUMNS, cols);
props.setProperty(serdeConstants.LIST_COLUMN_TYPES, ObjectInspectorUtils.getFieldTypes(oi));
LazyBinaryColumnarSerDe serde = new LazyBinaryColumnarSerDe();
SerDeUtils.initializeSerDe(serde, new Configuration(), props, null);
OuterStruct outerStruct = new OuterStruct();
BytesRefArrayWritable braw = (BytesRefArrayWritable) serde.serialize(outerStruct, oi);
ObjectInspector out_oi = serde.getObjectInspector();
Object out_o = serde.deserialize(braw);
if (0 != ObjectInspectorUtils.compare(outerStruct, oi, out_o, out_oi, new SimpleMapEqualComparer())) {
System.out.println("expected = " + SerDeUtils.getJSONString(outerStruct, oi));
System.out.println("actual = " + SerDeUtils.getJSONString(out_o, out_oi));
fail("Deserialized object does not compare");
}
}
use of org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe in project hive by apache.
the class TestLazyBinaryColumnarSerDe method testSerDeInnerNulls.
public void testSerDeInnerNulls() throws SerDeException {
StructObjectInspector oi = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(OuterStruct.class, ObjectInspectorOptions.JAVA);
String cols = ObjectInspectorUtils.getFieldNames(oi);
Properties props = new Properties();
props.setProperty(serdeConstants.LIST_COLUMNS, cols);
props.setProperty(serdeConstants.LIST_COLUMN_TYPES, ObjectInspectorUtils.getFieldTypes(oi));
LazyBinaryColumnarSerDe serde = new LazyBinaryColumnarSerDe();
SerDeUtils.initializeSerDe(serde, new Configuration(), props, null);
OuterStruct outerStruct = new OuterStruct();
outerStruct.mByte = 1;
outerStruct.mShort = 2;
outerStruct.mInt = 3;
outerStruct.mLong = 4l;
outerStruct.mFloat = 5.01f;
outerStruct.mDouble = 6.001d;
outerStruct.mString = "seven";
outerStruct.mBA = new byte[] { '3' };
InnerStruct is1 = new InnerStruct(null, 9l);
InnerStruct is2 = new InnerStruct(10, null);
outerStruct.mArray = new ArrayList<InnerStruct>(2);
outerStruct.mArray.add(is1);
outerStruct.mArray.add(is2);
outerStruct.mMap = new HashMap<String, InnerStruct>();
outerStruct.mMap.put(null, new InnerStruct(13, 14l));
outerStruct.mMap.put(new String("fifteen"), null);
outerStruct.mStruct = new InnerStruct(null, null);
BytesRefArrayWritable braw = (BytesRefArrayWritable) serde.serialize(outerStruct, oi);
ObjectInspector out_oi = serde.getObjectInspector();
Object out_o = serde.deserialize(braw);
if (0 != ObjectInspectorUtils.compare(outerStruct, oi, out_o, out_oi, new SimpleMapEqualComparer())) {
System.out.println("expected = " + SerDeUtils.getJSONString(outerStruct, oi));
System.out.println("actual = " + SerDeUtils.getJSONString(out_o, out_oi));
fail("Deserialized object does not compare");
}
}
use of org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe in project hive by apache.
the class TestLazyBinaryColumnarSerDe method testLazyBinaryColumnarSerDeWithEmptyBinary.
public void testLazyBinaryColumnarSerDeWithEmptyBinary() throws SerDeException {
StructObjectInspector oi = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(OuterStruct.class, ObjectInspectorOptions.JAVA);
String cols = ObjectInspectorUtils.getFieldNames(oi);
Properties props = new Properties();
props.setProperty(serdeConstants.LIST_COLUMNS, cols);
props.setProperty(serdeConstants.LIST_COLUMN_TYPES, ObjectInspectorUtils.getFieldTypes(oi));
LazyBinaryColumnarSerDe serde = new LazyBinaryColumnarSerDe();
SerDeUtils.initializeSerDe(serde, new Configuration(), props, null);
OuterStruct outerStruct = new OuterStruct();
outerStruct.mByte = 101;
outerStruct.mShort = 2002;
outerStruct.mInt = 3003;
outerStruct.mLong = 4004l;
outerStruct.mFloat = 5005.01f;
outerStruct.mDouble = 6006.001d;
outerStruct.mString = "";
outerStruct.mBA = new byte[] {};
outerStruct.mArray = new ArrayList<InnerStruct>();
outerStruct.mMap = new TreeMap<String, InnerStruct>();
outerStruct.mStruct = new InnerStruct(180018, 190019l);
try {
serde.serialize(outerStruct, oi);
} catch (RuntimeException re) {
assertEquals(re.getMessage(), "LazyBinaryColumnarSerde cannot serialize a non-null " + "zero length binary field. Consider using either LazyBinarySerde or ColumnarSerde.");
return;
}
assert false;
}
use of org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe in project presto by prestodb.
the class RcFileTester method assertFileContentsOld.
private static <K extends LongWritable, V extends BytesRefArrayWritable> void assertFileContentsOld(Type type, TempFile tempFile, Format format, Iterable<?> expectedValues) throws Exception {
JobConf configuration = new JobConf(new Configuration(false));
configuration.set(READ_COLUMN_IDS_CONF_STR, "0");
configuration.setBoolean(READ_ALL_COLUMNS, false);
Properties schema = new Properties();
schema.setProperty(META_TABLE_COLUMNS, "test");
schema.setProperty(META_TABLE_COLUMN_TYPES, getJavaObjectInspector(type).getTypeName());
@SuppressWarnings("deprecation") Deserializer deserializer;
if (format == Format.BINARY) {
deserializer = new LazyBinaryColumnarSerDe();
} else {
deserializer = new ColumnarSerDe();
}
deserializer.initialize(configuration, schema);
configuration.set(SERIALIZATION_LIB, deserializer.getClass().getName());
InputFormat<K, V> inputFormat = new RCFileInputFormat<>();
RecordReader<K, V> recordReader = inputFormat.getRecordReader(new FileSplit(new Path(tempFile.getFile().getAbsolutePath()), 0, tempFile.getFile().length(), (String[]) null), configuration, NULL);
K key = recordReader.createKey();
V value = recordReader.createValue();
StructObjectInspector rowInspector = (StructObjectInspector) deserializer.getObjectInspector();
StructField field = rowInspector.getStructFieldRef("test");
Iterator<?> iterator = expectedValues.iterator();
while (recordReader.next(key, value)) {
Object expectedValue = iterator.next();
Object rowData = deserializer.deserialize(value);
Object actualValue = rowInspector.getStructFieldData(rowData, field);
actualValue = decodeRecordReaderValue(type, actualValue);
assertColumnValueEquals(type, actualValue, expectedValue);
}
assertFalse(iterator.hasNext());
}
use of org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe in project hive by apache.
the class LazyBinarySerDe method serialize.
/**
* A recursive function that serialize an object to a byte buffer based on its
* object inspector.
*
* @param byteStream
* the byte stream storing the serialization data
* @param obj
* the object to serialize
* @param objInspector
* the object inspector
* @param skipLengthPrefix a boolean indicating whether length prefix is
* needed for list/map/struct
* @param warnedOnceNullMapKey a boolean indicating whether a warning
* has been issued once already when encountering null map keys
*/
public static void serialize(RandomAccessOutput byteStream, Object obj, ObjectInspector objInspector, boolean skipLengthPrefix, BooleanRef warnedOnceNullMapKey) throws SerDeException {
// do nothing for null object
if (null == obj) {
return;
}
switch(objInspector.getCategory()) {
case PRIMITIVE:
{
PrimitiveObjectInspector poi = (PrimitiveObjectInspector) objInspector;
switch(poi.getPrimitiveCategory()) {
case VOID:
{
return;
}
case BOOLEAN:
{
boolean v = ((BooleanObjectInspector) poi).get(obj);
byteStream.write((byte) (v ? 1 : 0));
return;
}
case BYTE:
{
ByteObjectInspector boi = (ByteObjectInspector) poi;
byte v = boi.get(obj);
byteStream.write(v);
return;
}
case SHORT:
{
ShortObjectInspector spoi = (ShortObjectInspector) poi;
short v = spoi.get(obj);
byteStream.write((byte) (v >> 8));
byteStream.write((byte) (v));
return;
}
case INT:
{
IntObjectInspector ioi = (IntObjectInspector) poi;
int v = ioi.get(obj);
LazyBinaryUtils.writeVInt(byteStream, v);
return;
}
case LONG:
{
LongObjectInspector loi = (LongObjectInspector) poi;
long v = loi.get(obj);
LazyBinaryUtils.writeVLong(byteStream, v);
return;
}
case FLOAT:
{
FloatObjectInspector foi = (FloatObjectInspector) poi;
int v = Float.floatToIntBits(foi.get(obj));
byteStream.write((byte) (v >> 24));
byteStream.write((byte) (v >> 16));
byteStream.write((byte) (v >> 8));
byteStream.write((byte) (v));
return;
}
case DOUBLE:
{
DoubleObjectInspector doi = (DoubleObjectInspector) poi;
LazyBinaryUtils.writeDouble(byteStream, doi.get(obj));
return;
}
case STRING:
{
StringObjectInspector soi = (StringObjectInspector) poi;
Text t = soi.getPrimitiveWritableObject(obj);
serializeText(byteStream, t, skipLengthPrefix);
return;
}
case CHAR:
{
HiveCharObjectInspector hcoi = (HiveCharObjectInspector) poi;
Text t = hcoi.getPrimitiveWritableObject(obj).getTextValue();
serializeText(byteStream, t, skipLengthPrefix);
return;
}
case VARCHAR:
{
HiveVarcharObjectInspector hcoi = (HiveVarcharObjectInspector) poi;
Text t = hcoi.getPrimitiveWritableObject(obj).getTextValue();
serializeText(byteStream, t, skipLengthPrefix);
return;
}
case BINARY:
{
BinaryObjectInspector baoi = (BinaryObjectInspector) poi;
BytesWritable bw = baoi.getPrimitiveWritableObject(obj);
int length = bw.getLength();
if (!skipLengthPrefix) {
LazyBinaryUtils.writeVInt(byteStream, length);
} else {
if (length == 0) {
throw new RuntimeException("LazyBinaryColumnarSerde cannot serialize a non-null zero " + "length binary field. Consider using either LazyBinarySerde or ColumnarSerde.");
}
}
byteStream.write(bw.getBytes(), 0, length);
return;
}
case DATE:
{
DateWritable d = ((DateObjectInspector) poi).getPrimitiveWritableObject(obj);
writeDateToByteStream(byteStream, d);
return;
}
case TIMESTAMP:
{
TimestampObjectInspector toi = (TimestampObjectInspector) poi;
TimestampWritable t = toi.getPrimitiveWritableObject(obj);
t.writeToByteStream(byteStream);
return;
}
case INTERVAL_YEAR_MONTH:
{
HiveIntervalYearMonthWritable intervalYearMonth = ((HiveIntervalYearMonthObjectInspector) poi).getPrimitiveWritableObject(obj);
intervalYearMonth.writeToByteStream(byteStream);
return;
}
case INTERVAL_DAY_TIME:
{
HiveIntervalDayTimeWritable intervalDayTime = ((HiveIntervalDayTimeObjectInspector) poi).getPrimitiveWritableObject(obj);
intervalDayTime.writeToByteStream(byteStream);
return;
}
case DECIMAL:
{
HiveDecimalObjectInspector bdoi = (HiveDecimalObjectInspector) poi;
HiveDecimalWritable t = bdoi.getPrimitiveWritableObject(obj);
if (t == null) {
return;
}
writeToByteStream(byteStream, t);
return;
}
default:
{
throw new RuntimeException("Unrecognized type: " + poi.getPrimitiveCategory());
}
}
}
case LIST:
{
ListObjectInspector loi = (ListObjectInspector) objInspector;
ObjectInspector eoi = loi.getListElementObjectInspector();
int byteSizeStart = 0;
int listStart = 0;
if (!skipLengthPrefix) {
// 1/ reserve spaces for the byte size of the list
// which is a integer and takes four bytes
byteSizeStart = byteStream.getLength();
byteStream.reserve(4);
listStart = byteStream.getLength();
}
// 2/ write the size of the list as a VInt
int size = loi.getListLength(obj);
LazyBinaryUtils.writeVInt(byteStream, size);
// 3/ write the null bytes
byte nullByte = 0;
for (int eid = 0; eid < size; eid++) {
// set the bit to 1 if an element is not null
if (null != loi.getListElement(obj, eid)) {
nullByte |= 1 << (eid % 8);
}
// if this is the last element
if (7 == eid % 8 || eid == size - 1) {
byteStream.write(nullByte);
nullByte = 0;
}
}
// 4/ write element by element from the list
for (int eid = 0; eid < size; eid++) {
serialize(byteStream, loi.getListElement(obj, eid), eoi, false, warnedOnceNullMapKey);
}
if (!skipLengthPrefix) {
// 5/ update the list byte size
int listEnd = byteStream.getLength();
int listSize = listEnd - listStart;
writeSizeAtOffset(byteStream, byteSizeStart, listSize);
}
return;
}
case MAP:
{
MapObjectInspector moi = (MapObjectInspector) objInspector;
ObjectInspector koi = moi.getMapKeyObjectInspector();
ObjectInspector voi = moi.getMapValueObjectInspector();
Map<?, ?> map = moi.getMap(obj);
int byteSizeStart = 0;
int mapStart = 0;
if (!skipLengthPrefix) {
// 1/ reserve spaces for the byte size of the map
// which is a integer and takes four bytes
byteSizeStart = byteStream.getLength();
byteStream.reserve(4);
mapStart = byteStream.getLength();
}
// 2/ write the size of the map which is a VInt
int size = map.size();
LazyBinaryUtils.writeVInt(byteStream, size);
// 3/ write the null bytes
int b = 0;
byte nullByte = 0;
for (Map.Entry<?, ?> entry : map.entrySet()) {
// set the bit to 1 if a key is not null
if (null != entry.getKey()) {
nullByte |= 1 << (b % 8);
} else if (warnedOnceNullMapKey != null) {
if (!warnedOnceNullMapKey.value) {
LOG.warn("Null map key encountered! Ignoring similar problems.");
}
warnedOnceNullMapKey.value = true;
}
b++;
// set the bit to 1 if a value is not null
if (null != entry.getValue()) {
nullByte |= 1 << (b % 8);
}
b++;
// or if this is the last key-value pair
if (0 == b % 8 || b == size * 2) {
byteStream.write(nullByte);
nullByte = 0;
}
}
// 4/ write key-value pairs one by one
for (Map.Entry<?, ?> entry : map.entrySet()) {
serialize(byteStream, entry.getKey(), koi, false, warnedOnceNullMapKey);
serialize(byteStream, entry.getValue(), voi, false, warnedOnceNullMapKey);
}
if (!skipLengthPrefix) {
// 5/ update the byte size of the map
int mapEnd = byteStream.getLength();
int mapSize = mapEnd - mapStart;
writeSizeAtOffset(byteStream, byteSizeStart, mapSize);
}
return;
}
case STRUCT:
case UNION:
{
int byteSizeStart = 0;
int typeStart = 0;
if (!skipLengthPrefix) {
// 1/ reserve spaces for the byte size of the struct
// which is a integer and takes four bytes
byteSizeStart = byteStream.getLength();
byteStream.reserve(4);
typeStart = byteStream.getLength();
}
if (ObjectInspector.Category.STRUCT.equals(objInspector.getCategory())) {
// 2/ serialize the struct
serializeStruct(byteStream, obj, (StructObjectInspector) objInspector, warnedOnceNullMapKey);
} else {
// 2/ serialize the union
serializeUnion(byteStream, obj, (UnionObjectInspector) objInspector, warnedOnceNullMapKey);
}
if (!skipLengthPrefix) {
// 3/ update the byte size of the struct
int typeEnd = byteStream.getLength();
int typeSize = typeEnd - typeStart;
writeSizeAtOffset(byteStream, byteSizeStart, typeSize);
}
return;
}
default:
{
throw new RuntimeException("Unrecognized type: " + objInspector.getCategory());
}
}
}
Aggregations