use of org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector in project hive by apache.
the class ColumnarSerDe method serialize.
/**
* Serialize a row of data.
*
* @param obj
* The row object
* @param objInspector
* The ObjectInspector for the row object
* @return The serialized Writable object
* @see AbstractSerDe#serialize(Object, ObjectInspector)
*/
@Override
public Writable serialize(Object obj, ObjectInspector objInspector) throws SerDeException {
if (objInspector.getCategory() != Category.STRUCT) {
throw new SerDeException(getClass().toString() + " can only serialize struct types, but we got: " + objInspector.getTypeName());
}
// Prepare the field ObjectInspectors
StructObjectInspector soi = (StructObjectInspector) objInspector;
List<? extends StructField> fields = soi.getAllStructFieldRefs();
List<Object> list = soi.getStructFieldsDataAsList(obj);
List<? extends StructField> declaredFields = (serdeParams.getRowTypeInfo() != null && ((StructTypeInfo) serdeParams.getRowTypeInfo()).getAllStructFieldNames().size() > 0) ? ((StructObjectInspector) getObjectInspector()).getAllStructFieldRefs() : null;
try {
// used for avoid extra byte copy
serializeStream.reset();
serializedSize = 0;
int count = 0;
// Serialize each field
for (int i = 0; i < fields.size(); i++) {
// Get the field objectInspector and the field object.
ObjectInspector foi = fields.get(i).getFieldObjectInspector();
Object f = (list == null ? null : list.get(i));
if (declaredFields != null && i >= declaredFields.size()) {
throw new SerDeException("Error: expecting " + declaredFields.size() + " but asking for field " + i + "\n" + "data=" + obj + "\n" + "tableType=" + serdeParams.getRowTypeInfo().toString() + "\n" + "dataType=" + TypeInfoUtils.getTypeInfoFromObjectInspector(objInspector));
}
// delimited way.
if (!foi.getCategory().equals(Category.PRIMITIVE) && (declaredFields == null || declaredFields.get(i).getFieldObjectInspector().getCategory().equals(Category.PRIMITIVE))) {
LazySimpleSerDe.serialize(serializeStream, SerDeUtils.getJSONString(f, foi), PrimitiveObjectInspectorFactory.javaStringObjectInspector, serdeParams.getSeparators(), 1, serdeParams.getNullSequence(), serdeParams.isEscaped(), serdeParams.getEscapeChar(), serdeParams.getNeedsEscape());
} else {
LazySimpleSerDe.serialize(serializeStream, f, foi, serdeParams.getSeparators(), 1, serdeParams.getNullSequence(), serdeParams.isEscaped(), serdeParams.getEscapeChar(), serdeParams.getNeedsEscape());
}
field[i].set(serializeStream.getData(), count, serializeStream.getLength() - count);
count = serializeStream.getLength();
}
serializedSize = serializeStream.getLength();
lastOperationSerialize = true;
lastOperationDeserialize = false;
} catch (IOException e) {
throw new SerDeException(e);
}
return serializeCache;
}
use of org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector in project hive by apache.
the class LazySimpleSerDe method doSerialize.
/**
* Serialize a row of data.
*
* @param obj
* The row object
* @param objInspector
* The ObjectInspector for the row object
* @return The serialized Writable object
* @throws IOException
* @see AbstractSerDe#serialize(Object, ObjectInspector)
*/
@Override
public Writable doSerialize(Object obj, ObjectInspector objInspector) throws SerDeException {
if (objInspector.getCategory() != Category.STRUCT) {
throw new SerDeException(getClass().toString() + " can only serialize struct types, but we got: " + objInspector.getTypeName());
}
// Prepare the field ObjectInspectors
StructObjectInspector soi = (StructObjectInspector) objInspector;
List<? extends StructField> fields = soi.getAllStructFieldRefs();
List<Object> list = soi.getStructFieldsDataAsList(obj);
List<? extends StructField> declaredFields = (serdeParams.getRowTypeInfo() != null && ((StructTypeInfo) serdeParams.getRowTypeInfo()).getAllStructFieldNames().size() > 0) ? ((StructObjectInspector) getObjectInspector()).getAllStructFieldRefs() : null;
serializeStream.reset();
serializedSize = 0;
// Serialize each field
for (int i = 0; i < fields.size(); i++) {
// Append the separator if needed.
if (i > 0) {
serializeStream.write(serdeParams.getSeparators()[0]);
}
// Get the field objectInspector and the field object.
ObjectInspector foi = fields.get(i).getFieldObjectInspector();
Object f = (list == null ? null : list.get(i));
if (declaredFields != null && i >= declaredFields.size()) {
throw new SerDeException("Error: expecting " + declaredFields.size() + " but asking for field " + i + "\n" + "data=" + obj + "\n" + "tableType=" + serdeParams.getRowTypeInfo().toString() + "\n" + "dataType=" + TypeInfoUtils.getTypeInfoFromObjectInspector(objInspector));
}
serializeField(serializeStream, f, foi, serdeParams);
}
// TODO: The copy of data is unnecessary, but there is no work-around
// since we cannot directly set the private byte[] field inside Text.
serializeCache.set(serializeStream.getData(), 0, serializeStream.getLength());
serializedSize = serializeStream.getLength();
lastOperationSerialize = true;
lastOperationDeserialize = false;
return serializeCache;
}
use of org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector in project hive by apache.
the class TestLazyArrayMapStruct method testNestedinArrayAtLevel.
/**
* @param nestingLevel
* @param dtype
* @param tableProp
* @throws SerDeException
*/
private void testNestedinArrayAtLevel(int nestingLevel, ObjectInspector.Category dtype, Properties tableProp) throws SerDeException {
//create type with nestingLevel levels of nesting
//set inner schema for dtype
String inSchema = null;
switch(dtype) {
case LIST:
inSchema = "array<tinyint>";
break;
case MAP:
inSchema = "map<string,int>";
break;
case STRUCT:
inSchema = "struct<s:string,i:tinyint>";
break;
case UNION:
inSchema = "uniontype<string,tinyint>";
break;
default:
fail("type not supported by test case");
}
StringBuilder schema = new StringBuilder(inSchema);
for (int i = 0; i < nestingLevel - 1; i++) {
schema.insert(0, "array<");
schema.append(">");
}
System.err.println("Testing nesting level " + nestingLevel + ". Using schema " + schema);
// Create the SerDe
LazySimpleSerDe serDe = new LazySimpleSerDe();
Configuration conf = new Configuration();
tableProp.setProperty("columns", "narray");
tableProp.setProperty("columns.types", schema.toString());
SerDeUtils.initializeSerDe(serDe, conf, tableProp, null);
LazySerDeParameters serdeParams = new LazySerDeParameters(conf, tableProp, LazySimpleSerDe.class.getName());
//create the serialized string for type
byte[] separators = serdeParams.getSeparators();
System.err.println("Using separator " + (char) separators[nestingLevel]);
byte[] serializedRow = null;
switch(dtype) {
case LIST:
serializedRow = new byte[] { '8', separators[nestingLevel], '9' };
break;
case MAP:
byte kvSep = separators[nestingLevel + 1];
byte kvPairSep = separators[nestingLevel];
serializedRow = new byte[] { '1', kvSep, '1', kvPairSep, '2', kvSep, '2' };
break;
case STRUCT:
serializedRow = new byte[] { '8', separators[nestingLevel], '9' };
break;
case UNION:
serializedRow = new byte[] { '0', separators[nestingLevel], '9' };
break;
default:
fail("type not supported by test case");
}
//create LazyStruct with serialized string with expected separators
StructObjectInspector oi = (StructObjectInspector) serDe.getObjectInspector();
LazyStruct struct = (LazyStruct) LazyFactory.createLazyObject(oi);
TestLazyPrimitive.initLazyObject(struct, serializedRow, 0, serializedRow.length);
//Get fields out of the lazy struct and check if they match expected
// results
//Get first level array
LazyArray array = (LazyArray) struct.getField(0);
//Peel off the n-1 levels to get to the underlying array
for (int i = 0; i < nestingLevel - 2; i++) {
array = (LazyArray) array.getListElementObject(0);
}
//verify the serialized format for dtype
switch(dtype) {
case LIST:
LazyArray array1 = (LazyArray) array.getListElementObject(0);
//check elements of the innermost array
assertEquals(2, array1.getListLength());
assertEquals(new ByteWritable((byte) 8), ((LazyByte) array1.getListElementObject(0)).getWritableObject());
assertEquals(new ByteWritable((byte) 9), ((LazyByte) array1.getListElementObject(1)).getWritableObject());
break;
case MAP:
LazyMap lazyMap = (LazyMap) array.getListElementObject(0);
Map map = lazyMap.getMap();
System.err.println(map);
assertEquals(2, map.size());
Iterator<Map.Entry<LazyString, LazyInteger>> it = map.entrySet().iterator();
Entry<LazyString, LazyInteger> e1 = it.next();
assertEquals(e1.getKey().getWritableObject(), new Text(new byte[] { '1' }));
assertEquals(e1.getValue().getWritableObject(), new IntWritable(1));
Entry<LazyString, LazyInteger> e2 = it.next();
assertEquals(e2.getKey().getWritableObject(), new Text(new byte[] { '2' }));
assertEquals(e2.getValue().getWritableObject(), new IntWritable(2));
break;
case STRUCT:
LazyStruct innerStruct = (LazyStruct) array.getListElementObject(0);
//check elements of the innermost struct
assertEquals(2, innerStruct.getFieldsAsList().size());
assertEquals(new Text(new byte[] { '8' }), ((LazyString) innerStruct.getField(0)).getWritableObject());
assertEquals(new ByteWritable((byte) 9), ((LazyByte) innerStruct.getField(1)).getWritableObject());
break;
case UNION:
LazyUnion lazyUnion = (LazyUnion) array.getListElementObject(0);
//check elements of the innermost union
assertEquals(new Text(new byte[] { '9' }), ((LazyString) lazyUnion.getField()).getWritableObject());
break;
default:
fail("type not supported by test case");
}
//test serialization
Text serializedText = (Text) serDe.serialize(struct.getObject(), serDe.getObjectInspector());
org.junit.Assert.assertArrayEquals(serializedRow, serializedText.getBytes());
}
use of org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector in project hive by apache.
the class TestLazyBinaryColumnarSerDe method testSerDeOuterNulls.
public void testSerDeOuterNulls() throws SerDeException {
StructObjectInspector oi = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(OuterStruct.class, ObjectInspectorOptions.JAVA);
String cols = ObjectInspectorUtils.getFieldNames(oi);
Properties props = new Properties();
props.setProperty(serdeConstants.LIST_COLUMNS, cols);
props.setProperty(serdeConstants.LIST_COLUMN_TYPES, ObjectInspectorUtils.getFieldTypes(oi));
LazyBinaryColumnarSerDe serde = new LazyBinaryColumnarSerDe();
SerDeUtils.initializeSerDe(serde, new Configuration(), props, null);
OuterStruct outerStruct = new OuterStruct();
BytesRefArrayWritable braw = (BytesRefArrayWritable) serde.serialize(outerStruct, oi);
ObjectInspector out_oi = serde.getObjectInspector();
Object out_o = serde.deserialize(braw);
if (0 != ObjectInspectorUtils.compare(outerStruct, oi, out_o, out_oi, new SimpleMapEqualComparer())) {
System.out.println("expected = " + SerDeUtils.getJSONString(outerStruct, oi));
System.out.println("actual = " + SerDeUtils.getJSONString(out_o, out_oi));
fail("Deserialized object does not compare");
}
}
use of org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector in project hive by apache.
the class TestLazyBinaryColumnarSerDe method testSerDeInnerNulls.
public void testSerDeInnerNulls() throws SerDeException {
StructObjectInspector oi = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(OuterStruct.class, ObjectInspectorOptions.JAVA);
String cols = ObjectInspectorUtils.getFieldNames(oi);
Properties props = new Properties();
props.setProperty(serdeConstants.LIST_COLUMNS, cols);
props.setProperty(serdeConstants.LIST_COLUMN_TYPES, ObjectInspectorUtils.getFieldTypes(oi));
LazyBinaryColumnarSerDe serde = new LazyBinaryColumnarSerDe();
SerDeUtils.initializeSerDe(serde, new Configuration(), props, null);
OuterStruct outerStruct = new OuterStruct();
outerStruct.mByte = 1;
outerStruct.mShort = 2;
outerStruct.mInt = 3;
outerStruct.mLong = 4l;
outerStruct.mFloat = 5.01f;
outerStruct.mDouble = 6.001d;
outerStruct.mString = "seven";
outerStruct.mBA = new byte[] { '3' };
InnerStruct is1 = new InnerStruct(null, 9l);
InnerStruct is2 = new InnerStruct(10, null);
outerStruct.mArray = new ArrayList<InnerStruct>(2);
outerStruct.mArray.add(is1);
outerStruct.mArray.add(is2);
outerStruct.mMap = new HashMap<String, InnerStruct>();
outerStruct.mMap.put(null, new InnerStruct(13, 14l));
outerStruct.mMap.put(new String("fifteen"), null);
outerStruct.mStruct = new InnerStruct(null, null);
BytesRefArrayWritable braw = (BytesRefArrayWritable) serde.serialize(outerStruct, oi);
ObjectInspector out_oi = serde.getObjectInspector();
Object out_o = serde.deserialize(braw);
if (0 != ObjectInspectorUtils.compare(outerStruct, oi, out_o, out_oi, new SimpleMapEqualComparer())) {
System.out.println("expected = " + SerDeUtils.getJSONString(outerStruct, oi));
System.out.println("actual = " + SerDeUtils.getJSONString(out_o, out_oi));
fail("Deserialized object does not compare");
}
}
Aggregations