use of org.apache.hadoop.hive.serde2.lazy.LazyStruct in project hive by apache.
the class ColumnarSerDe method initialize.
@Override
public void initialize(Configuration configuration, Properties tableProperties, Properties partitionProperties) throws SerDeException {
super.initialize(configuration, tableProperties, partitionProperties);
serdeParams = new LazySerDeParameters(configuration, properties, getClass().getName());
// Create the ObjectInspectors for the fields. Note: Currently
// ColumnarObject uses same ObjectInspector as LazyStruct
cachedObjectInspector = LazyFactory.createColumnarStructInspector(serdeParams.getColumnNames(), serdeParams.getColumnTypes(), serdeParams);
int size = serdeParams.getColumnTypes().size();
List<Integer> notSkipIDs = new ArrayList<Integer>();
if (!this.configuration.isPresent() || ColumnProjectionUtils.isReadAllColumns(this.configuration.get())) {
for (int i = 0; i < size; i++) {
notSkipIDs.add(i);
}
} else {
notSkipIDs = ColumnProjectionUtils.getReadColumnIDs(this.configuration.get());
}
cachedLazyStruct = new ColumnarStruct(cachedObjectInspector, notSkipIDs, serdeParams.getNullSequence());
super.initialize(size);
}
use of org.apache.hadoop.hive.serde2.lazy.LazyStruct in project hive by apache.
the class AvroLazyObjectInspector method getStructFieldData.
@SuppressWarnings("unchecked")
@Override
public Object getStructFieldData(Object data, StructField f) {
if (data == null) {
return null;
}
int fieldID = f.getFieldID();
if (LOG.isDebugEnabled()) {
LOG.debug("Getting struct field data for field: [" + f.getFieldName() + "] on data [" + data.getClass() + "]");
}
if (data instanceof LazyStruct) {
LazyStruct row = (LazyStruct) data;
// get the field out of struct
Object rowField = row.getField(fieldID);
if (rowField instanceof LazyStruct) {
if (LOG.isDebugEnabled() && rowField != null) {
LOG.debug("Deserializing struct [" + rowField.getClass() + "]");
}
return deserializeStruct(rowField, f.getFieldName());
} else if (rowField instanceof LazyMap) {
// We have found a map. Systematically deserialize the values of the map and return back the
// map
LazyMap lazyMap = (LazyMap) rowField;
for (Entry<Object, Object> entry : lazyMap.getMap().entrySet()) {
Object _key = entry.getKey();
Object _value = entry.getValue();
if (_value instanceof LazyStruct) {
lazyMap.getMap().put(_key, deserializeStruct(_value, f.getFieldName()));
}
}
if (LOG.isDebugEnabled()) {
LOG.debug("Returning a lazy map for field [" + f.getFieldName() + "]");
}
return lazyMap;
} else {
if (LOG.isDebugEnabled()) {
LOG.debug("Returning [" + rowField + "] for field [" + f.getFieldName() + "]");
}
// Just return the object. We need no further operation on it
return rowField;
}
} else {
// hive can operate on. Here we should be getting the same object back.
if (!(data instanceof List)) {
throw new IllegalArgumentException("data should be an instance of list");
}
if (!(fieldID < ((List<Object>) data).size())) {
return null;
}
// lookup the field corresponding to the given field ID and return
Object field = ((List<Object>) data).get(fieldID);
if (field == null) {
return null;
}
// convert to a lazy object and return
return toLazyObject(field, f.getFieldObjectInspector());
}
}
use of org.apache.hadoop.hive.serde2.lazy.LazyStruct in project hive by apache.
the class AvroLazyObjectInspector method deserializeStruct.
/**
* Deserialize the given struct object
*
* @param struct the object to deserialize
* @param fieldName name of the field on which we are currently operating on
* @return a deserialized object can hive can further operate on
* @throws AvroObjectInspectorException if something goes wrong during deserialization
*/
private Object deserializeStruct(Object struct, String fieldName) {
byte[] data = ((LazyStruct) struct).getBytes();
AvroDeserializer deserializer = new AvroDeserializer();
if (data == null || data.length == 0) {
return null;
}
if (readerSchema == null && schemaRetriever == null) {
throw new IllegalArgumentException("reader schema or schemaRetriever must be set for field [" + fieldName + "]");
}
Schema ws = null;
Schema rs = null;
int offset = 0;
AvroGenericRecordWritable avroWritable = new AvroGenericRecordWritable();
if (readerSchema == null) {
offset = schemaRetriever.getOffset();
if (data.length < offset) {
throw new IllegalArgumentException("Data size cannot be less than [" + offset + "]. Found [" + data.length + "]");
}
rs = schemaRetriever.retrieveReaderSchema(data);
if (rs == null) {
// still nothing, Raise exception
throw new IllegalStateException("A valid reader schema could not be retrieved either directly or from the schema retriever for field [" + fieldName + "]");
}
ws = schemaRetriever.retrieveWriterSchema(data);
if (ws == null) {
throw new IllegalStateException("Null writer schema retrieved from schemaRetriever for field [" + fieldName + "]");
}
// adjust the data bytes according to any possible offset that was provided
if (LOG.isDebugEnabled()) {
LOG.debug("Retrieved writer Schema: " + ws.toString());
LOG.debug("Retrieved reader Schema: " + rs.toString());
}
try {
avroWritable.readFields(data, offset, data.length, ws, rs);
} catch (IOException ioe) {
throw new AvroObjectInspectorException("Error deserializing avro payload", ioe);
}
} else {
// a reader schema was provided
if (schemaRetriever != null) {
// a schema retriever has been provided as well. Attempt to read the write schema from the
// retriever
ws = schemaRetriever.retrieveWriterSchema(data);
if (ws == null) {
throw new IllegalStateException("Null writer schema retrieved from schemaRetriever for field [" + fieldName + "]");
}
} else {
// attempt retrieving the schema from the data
ws = retrieveSchemaFromBytes(data);
}
rs = readerSchema;
try {
avroWritable.readFields(data, ws, rs);
} catch (IOException ioe) {
throw new AvroObjectInspectorException("Error deserializing avro payload", ioe);
}
}
AvroObjectInspectorGenerator oiGenerator = null;
Object deserializedObject = null;
try {
oiGenerator = new AvroObjectInspectorGenerator(rs);
deserializedObject = deserializer.deserialize(oiGenerator.getColumnNames(), oiGenerator.getColumnTypes(), avroWritable, rs);
} catch (SerDeException se) {
throw new AvroObjectInspectorException("Error deserializing avro payload", se);
}
return deserializedObject;
}
use of org.apache.hadoop.hive.serde2.lazy.LazyStruct in project hive by apache.
the class VerifyLazy method lazyCompare.
public static boolean lazyCompare(TypeInfo typeInfo, Object lazyObject, Object expectedObject) {
if (expectedObject == null) {
if (lazyObject != null) {
throw new RuntimeException("Expected object is null but object is not null " + lazyObject.toString() + " typeInfo " + typeInfo.toString());
}
return true;
} else if (lazyObject == null) {
throw new RuntimeException("Expected object is not null \"" + expectedObject.toString() + "\" typeInfo " + typeInfo.toString() + " but object is null");
}
if (lazyObject instanceof Writable) {
if (!lazyObject.equals(expectedObject)) {
throw new RuntimeException("Expected object " + expectedObject.toString() + " and actual object " + lazyObject.toString() + " is not equal typeInfo " + typeInfo.toString());
}
return true;
}
if (lazyObject instanceof LazyPrimitive) {
Object primitiveObject = ((LazyPrimitive) lazyObject).getObject();
PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) typeInfo;
switch(primitiveTypeInfo.getPrimitiveCategory()) {
case BOOLEAN:
{
if (!(primitiveObject instanceof LazyBoolean)) {
throw new RuntimeException("Expected LazyBoolean");
}
boolean value = ((LazyBoolean) primitiveObject).getWritableObject().get();
boolean expected = ((BooleanWritable) expectedObject).get();
if (value != expected) {
throw new RuntimeException("Boolean field mismatch (expected " + expected + " found " + value + ")");
}
}
break;
case BYTE:
{
if (!(primitiveObject instanceof LazyByte)) {
throw new RuntimeException("Expected LazyByte");
}
byte value = ((LazyByte) primitiveObject).getWritableObject().get();
byte expected = ((ByteWritable) expectedObject).get();
if (value != expected) {
throw new RuntimeException("Byte field mismatch (expected " + (int) expected + " found " + (int) value + ")");
}
}
break;
case SHORT:
{
if (!(primitiveObject instanceof LazyShort)) {
throw new RuntimeException("Expected LazyShort");
}
short value = ((LazyShort) primitiveObject).getWritableObject().get();
short expected = ((ShortWritable) expectedObject).get();
if (value != expected) {
throw new RuntimeException("Short field mismatch (expected " + expected + " found " + value + ")");
}
}
break;
case INT:
{
if (!(primitiveObject instanceof LazyInteger)) {
throw new RuntimeException("Expected LazyInteger");
}
int value = ((LazyInteger) primitiveObject).getWritableObject().get();
int expected = ((IntWritable) expectedObject).get();
if (value != expected) {
throw new RuntimeException("Int field mismatch (expected " + expected + " found " + value + ")");
}
}
break;
case LONG:
{
if (!(primitiveObject instanceof LazyLong)) {
throw new RuntimeException("Expected LazyLong");
}
long value = ((LazyLong) primitiveObject).getWritableObject().get();
long expected = ((LongWritable) expectedObject).get();
if (value != expected) {
throw new RuntimeException("Long field mismatch (expected " + expected + " found " + value + ")");
}
}
break;
case FLOAT:
{
if (!(primitiveObject instanceof LazyFloat)) {
throw new RuntimeException("Expected LazyFloat");
}
float value = ((LazyFloat) primitiveObject).getWritableObject().get();
float expected = ((FloatWritable) expectedObject).get();
if (value != expected) {
throw new RuntimeException("Float field mismatch (expected " + expected + " found " + value + ")");
}
}
break;
case DOUBLE:
{
if (!(primitiveObject instanceof LazyDouble)) {
throw new RuntimeException("Expected LazyDouble");
}
double value = ((LazyDouble) primitiveObject).getWritableObject().get();
double expected = ((DoubleWritable) expectedObject).get();
if (value != expected) {
throw new RuntimeException("Double field mismatch (expected " + expected + " found " + value + ")");
}
}
break;
case STRING:
{
if (!(primitiveObject instanceof LazyString)) {
throw new RuntimeException("Text expected writable not Text");
}
Text value = ((LazyString) primitiveObject).getWritableObject();
Text expected = ((Text) expectedObject);
if (!value.equals(expected)) {
throw new RuntimeException("String field mismatch (expected '" + expected + "' found '" + value + "')");
}
}
break;
case CHAR:
{
if (!(primitiveObject instanceof LazyHiveChar)) {
throw new RuntimeException("Expected LazyHiveChar");
}
HiveChar value = ((LazyHiveChar) primitiveObject).getWritableObject().getHiveChar();
HiveChar expected = ((HiveCharWritable) expectedObject).getHiveChar();
if (!value.equals(expected)) {
throw new RuntimeException("HiveChar field mismatch (expected '" + expected + "' found '" + value + "')");
}
}
break;
case VARCHAR:
{
if (!(primitiveObject instanceof LazyHiveVarchar)) {
throw new RuntimeException("Expected LazyHiveVarchar");
}
HiveVarchar value = ((LazyHiveVarchar) primitiveObject).getWritableObject().getHiveVarchar();
HiveVarchar expected = ((HiveVarcharWritable) expectedObject).getHiveVarchar();
if (!value.equals(expected)) {
throw new RuntimeException("HiveVarchar field mismatch (expected '" + expected + "' found '" + value + "')");
}
}
break;
case DECIMAL:
{
if (!(primitiveObject instanceof LazyHiveDecimal)) {
throw new RuntimeException("Expected LazyDecimal");
}
HiveDecimal value = ((LazyHiveDecimal) primitiveObject).getWritableObject().getHiveDecimal();
HiveDecimal expected = ((HiveDecimalWritable) expectedObject).getHiveDecimal();
if (!value.equals(expected)) {
DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) primitiveTypeInfo;
int precision = decimalTypeInfo.getPrecision();
int scale = decimalTypeInfo.getScale();
throw new RuntimeException("Decimal field mismatch (expected " + expected.toString() + " found " + value.toString() + ") precision " + precision + ", scale " + scale);
}
}
break;
case DATE:
{
if (!(primitiveObject instanceof LazyDate)) {
throw new RuntimeException("Expected LazyDate");
}
Date value = ((LazyDate) primitiveObject).getWritableObject().get();
Date expected = ((DateWritableV2) expectedObject).get();
if (!value.equals(expected)) {
throw new RuntimeException("Date field mismatch (expected " + expected + " found " + value + ")");
}
}
break;
case TIMESTAMP:
{
if (!(primitiveObject instanceof LazyTimestamp)) {
throw new RuntimeException("TimestampWritableV2 expected writable not TimestampWritableV2");
}
Timestamp value = ((LazyTimestamp) primitiveObject).getWritableObject().getTimestamp();
Timestamp expected = ((TimestampWritableV2) expectedObject).getTimestamp();
if (!value.equals(expected)) {
throw new RuntimeException("Timestamp field mismatch (expected " + expected + " found " + value + ")");
}
}
break;
case INTERVAL_YEAR_MONTH:
{
if (!(primitiveObject instanceof LazyHiveIntervalYearMonth)) {
throw new RuntimeException("Expected LazyHiveIntervalYearMonth");
}
HiveIntervalYearMonth value = ((LazyHiveIntervalYearMonth) primitiveObject).getWritableObject().getHiveIntervalYearMonth();
HiveIntervalYearMonth expected = ((HiveIntervalYearMonthWritable) expectedObject).getHiveIntervalYearMonth();
if (!value.equals(expected)) {
throw new RuntimeException("HiveIntervalYearMonth field mismatch (expected " + expected + " found " + value + ")");
}
}
break;
case INTERVAL_DAY_TIME:
{
if (!(primitiveObject instanceof LazyHiveIntervalDayTime)) {
throw new RuntimeException("Expected writable LazyHiveIntervalDayTime");
}
HiveIntervalDayTime value = ((LazyHiveIntervalDayTime) primitiveObject).getWritableObject().getHiveIntervalDayTime();
HiveIntervalDayTime expected = ((HiveIntervalDayTimeWritable) expectedObject).getHiveIntervalDayTime();
if (!value.equals(expected)) {
throw new RuntimeException("HiveIntervalDayTime field mismatch (expected " + expected + " found " + value + ")");
}
}
break;
case BINARY:
{
if (!(primitiveObject instanceof LazyBinary)) {
throw new RuntimeException("Expected LazyBinary");
}
BytesWritable bytesWritable = ((LazyBinary) primitiveObject).getWritableObject();
byte[] value = Arrays.copyOfRange(bytesWritable.getBytes(), 0, bytesWritable.getLength());
BytesWritable bytesWritableExpected = (BytesWritable) expectedObject;
byte[] expected = Arrays.copyOfRange(bytesWritableExpected.getBytes(), 0, bytesWritableExpected.getLength());
if (value.length != expected.length) {
throw new RuntimeException("Byte Array field mismatch (expected " + Arrays.toString(expected) + " found " + Arrays.toString(value) + ")");
}
for (int b = 0; b < value.length; b++) {
if (value[b] != expected[b]) {
throw new RuntimeException("Byte Array field mismatch (expected " + Arrays.toString(expected) + " found " + Arrays.toString(value) + ")");
}
}
}
break;
default:
throw new Error("Unknown primitive category " + primitiveTypeInfo.getPrimitiveCategory());
}
} else if (lazyObject instanceof LazyArray) {
LazyArray lazyArray = (LazyArray) lazyObject;
List<Object> list = lazyArray.getList();
List<Object> expectedList = (List<Object>) expectedObject;
ListTypeInfo listTypeInfo = (ListTypeInfo) typeInfo;
if (list.size() != expectedList.size()) {
throw new RuntimeException("SerDe deserialized list length does not match (list " + list.toString() + " list.size() " + list.size() + " expectedList " + expectedList.toString() + " expectedList.size() " + expectedList.size() + ")" + " elementTypeInfo " + listTypeInfo.getListElementTypeInfo().toString());
}
return lazyCompareList((ListTypeInfo) typeInfo, list, expectedList);
} else if (typeInfo instanceof ListTypeInfo) {
List<Object> list;
if (lazyObject instanceof LazyBinaryArray) {
list = ((LazyBinaryArray) lazyObject).getList();
} else {
list = (List<Object>) lazyObject;
}
List<Object> expectedList = (List<Object>) expectedObject;
if (list.size() != expectedList.size()) {
throw new RuntimeException("SerDe deserialized list length does not match (list " + list.toString() + " list.size() " + list.size() + " expectedList " + expectedList.toString() + " expectedList.size() " + expectedList.size() + ")");
}
return lazyCompareList((ListTypeInfo) typeInfo, list, expectedList);
} else if (lazyObject instanceof LazyMap) {
LazyMap lazyMap = (LazyMap) lazyObject;
Map<Object, Object> map = lazyMap.getMap();
Map<Object, Object> expectedMap = (Map<Object, Object>) expectedObject;
return lazyCompareMap((MapTypeInfo) typeInfo, map, expectedMap);
} else if (typeInfo instanceof MapTypeInfo) {
Map<Object, Object> map;
Map<Object, Object> expectedMap = (Map<Object, Object>) expectedObject;
if (lazyObject instanceof LazyBinaryMap) {
map = ((LazyBinaryMap) lazyObject).getMap();
} else {
map = (Map<Object, Object>) lazyObject;
}
return lazyCompareMap((MapTypeInfo) typeInfo, map, expectedMap);
} else if (lazyObject instanceof LazyStruct) {
LazyStruct lazyStruct = (LazyStruct) lazyObject;
List<Object> fields = lazyStruct.getFieldsAsList();
List<Object> expectedFields = (List<Object>) expectedObject;
StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
return lazyCompareStruct(structTypeInfo, fields, expectedFields);
} else if (typeInfo instanceof StructTypeInfo) {
ArrayList<Object> fields;
if (lazyObject instanceof LazyBinaryStruct) {
fields = ((LazyBinaryStruct) lazyObject).getFieldsAsList();
} else {
fields = (ArrayList<Object>) lazyObject;
}
List<Object> expectedFields = (List<Object>) expectedObject;
StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
return lazyCompareStruct(structTypeInfo, fields, expectedFields);
} else if (lazyObject instanceof LazyUnion) {
LazyUnion union = (LazyUnion) lazyObject;
StandardUnionObjectInspector.StandardUnion expectedUnion = (StandardUnionObjectInspector.StandardUnion) expectedObject;
UnionTypeInfo unionTypeInfo = (UnionTypeInfo) typeInfo;
return lazyCompareUnion(unionTypeInfo, union, expectedUnion);
} else if (typeInfo instanceof UnionTypeInfo) {
StandardUnionObjectInspector.StandardUnion expectedUnion = (StandardUnionObjectInspector.StandardUnion) expectedObject;
UnionTypeInfo unionTypeInfo = (UnionTypeInfo) typeInfo;
if (lazyObject instanceof LazyBinaryUnion) {
return lazyCompareUnion(unionTypeInfo, (LazyBinaryUnion) lazyObject, expectedUnion);
} else {
return lazyCompareUnion(unionTypeInfo, (UnionObject) lazyObject, expectedUnion);
}
} else {
System.err.println("Not implemented " + typeInfo.getClass().getName());
}
return true;
}
use of org.apache.hadoop.hive.serde2.lazy.LazyStruct in project hive by apache.
the class TestHBaseSerDe method deserializeAndSerializeHBaseValueStruct.
private void deserializeAndSerializeHBaseValueStruct(HBaseSerDe serDe, Result r, Put p) throws SerDeException, IOException {
StructObjectInspector soi = (StructObjectInspector) serDe.getObjectInspector();
List<? extends StructField> fieldRefs = soi.getAllStructFieldRefs();
Object row = serDe.deserialize(new ResultWritable(r));
Object fieldData = null;
for (int j = 0; j < fieldRefs.size(); j++) {
fieldData = soi.getStructFieldData(row, fieldRefs.get(j));
assertNotNull(fieldData);
if (fieldData instanceof LazyStruct) {
assertEquals(((LazyStruct) fieldData).getField(0).toString(), "A");
assertEquals(((LazyStruct) fieldData).getField(1).toString(), "B");
assertEquals(((LazyStruct) fieldData).getField(2).toString(), "C");
} else {
Assert.fail("fieldData should be an instance of LazyStruct");
}
}
assertEquals("{\"key\":{\"col1\":\"A\",\"col2\":\"B\",\"col3\":\"C\"},\"astring\":{\"col1\":\"A\",\"col2\":\"B\",\"col3\":\"C\"}}", SerDeUtils.getJSONString(row, soi));
// Now serialize
Put put = ((PutWritable) serDe.serialize(row, soi)).getPut();
assertEquals("Serialized put:", p.toString(), put.toString());
}
Aggregations