use of org.apache.hadoop.hive.serde2.objectinspector.StandardUnionObjectInspector.StandardUnion in project hive by apache.
the class AvroLazyObjectInspector method toLazyUnionObject.
/**
* Convert the given object to a lazy object using the given {@link ObjectInspector}
*
* @param obj Object to be converted to a {@link LazyObject}
* @param oi ObjectInspector used for the conversion
* @return the created {@link LazyObject lazy object}
* */
private Object toLazyUnionObject(Object obj, ObjectInspector objectInspector) {
if (obj == null) {
return null;
}
if (!(objectInspector instanceof LazyUnionObjectInspector)) {
throw new IllegalArgumentException("Invalid objectinspector found. Expected LazyUnionObjectInspector, Found " + objectInspector.getClass());
}
StandardUnion standardUnion = (StandardUnion) obj;
LazyUnionObjectInspector lazyUnionOI = (LazyUnionObjectInspector) objectInspector;
// Grab the tag and the field
byte tag = standardUnion.getTag();
Object field = standardUnion.getObject();
ObjectInspector fieldOI = lazyUnionOI.getObjectInspectors().get(tag);
// convert to lazy object
Object convertedObj = null;
if (field != null) {
convertedObj = toLazyObject(field, fieldOI);
}
if (convertedObj == null) {
return null;
}
return new LazyUnion(lazyUnionOI, tag, convertedObj);
}
use of org.apache.hadoop.hive.serde2.objectinspector.StandardUnionObjectInspector.StandardUnion in project hive by apache.
the class BinarySortableSerDe method deserialize.
static Object deserialize(InputByteBuffer buffer, TypeInfo type, boolean invert, byte nullMarker, byte notNullMarker, Object reuse) throws IOException {
// Is this field a null?
byte isNull = buffer.read(invert);
if (isNull == nullMarker) {
return null;
}
assert (isNull == notNullMarker);
switch(type.getCategory()) {
case PRIMITIVE:
{
PrimitiveTypeInfo ptype = (PrimitiveTypeInfo) type;
switch(ptype.getPrimitiveCategory()) {
case VOID:
{
return null;
}
case BOOLEAN:
{
BooleanWritable r = reuse == null ? new BooleanWritable() : (BooleanWritable) reuse;
byte b = buffer.read(invert);
assert (b == 1 || b == 2);
r.set(b == 2);
return r;
}
case BYTE:
{
ByteWritable r = reuse == null ? new ByteWritable() : (ByteWritable) reuse;
r.set((byte) (buffer.read(invert) ^ 0x80));
return r;
}
case SHORT:
{
ShortWritable r = reuse == null ? new ShortWritable() : (ShortWritable) reuse;
int v = buffer.read(invert) ^ 0x80;
v = (v << 8) + (buffer.read(invert) & 0xff);
r.set((short) v);
return r;
}
case INT:
{
IntWritable r = reuse == null ? new IntWritable() : (IntWritable) reuse;
r.set(deserializeInt(buffer, invert));
return r;
}
case LONG:
{
LongWritable r = reuse == null ? new LongWritable() : (LongWritable) reuse;
r.set(deserializeLong(buffer, invert));
return r;
}
case FLOAT:
{
FloatWritable r = reuse == null ? new FloatWritable() : (FloatWritable) reuse;
int v = 0;
for (int i = 0; i < 4; i++) {
v = (v << 8) + (buffer.read(invert) & 0xff);
}
if ((v & (1 << 31)) == 0) {
// negative number, flip all bits
v = ~v;
} else {
// positive number, flip the first bit
v = v ^ (1 << 31);
}
r.set(Float.intBitsToFloat(v));
return r;
}
case DOUBLE:
{
DoubleWritable r = reuse == null ? new DoubleWritable() : (DoubleWritable) reuse;
long v = 0;
for (int i = 0; i < 8; i++) {
v = (v << 8) + (buffer.read(invert) & 0xff);
}
if ((v & (1L << 63)) == 0) {
// negative number, flip all bits
v = ~v;
} else {
// positive number, flip the first bit
v = v ^ (1L << 63);
}
r.set(Double.longBitsToDouble(v));
return r;
}
case STRING:
{
Text r = reuse == null ? new Text() : (Text) reuse;
return deserializeText(buffer, invert, r);
}
case CHAR:
{
HiveCharWritable r = reuse == null ? new HiveCharWritable() : (HiveCharWritable) reuse;
// Use internal text member to read value
deserializeText(buffer, invert, r.getTextValue());
r.enforceMaxLength(getCharacterMaxLength(type));
return r;
}
case VARCHAR:
{
HiveVarcharWritable r = reuse == null ? new HiveVarcharWritable() : (HiveVarcharWritable) reuse;
// Use HiveVarchar's internal Text member to read the value.
deserializeText(buffer, invert, r.getTextValue());
// If we cache helper data for deserialization we could avoid having
// to call getVarcharMaxLength() on every deserialize call.
r.enforceMaxLength(getCharacterMaxLength(type));
return r;
}
case BINARY:
{
BytesWritable bw = new BytesWritable();
// Get the actual length first
int start = buffer.tell();
int length = 0;
do {
byte b = buffer.read(invert);
if (b == 0) {
// end of string
break;
}
if (b == 1) {
// the last char is an escape char. read the actual char
buffer.read(invert);
}
length++;
} while (true);
if (length == buffer.tell() - start) {
// No escaping happened, so we are already done.
bw.set(buffer.getData(), start, length);
} else {
// Escaping happened, we need to copy byte-by-byte.
// 1. Set the length first.
bw.set(buffer.getData(), start, length);
// 2. Reset the pointer.
buffer.seek(start);
// 3. Copy the data.
byte[] rdata = bw.getBytes();
for (int i = 0; i < length; i++) {
byte b = buffer.read(invert);
if (b == 1) {
// The last char is an escape char, read the actual char.
// The serialization format escape \0 to \1, and \1 to \2,
// to make sure the string is null-terminated.
b = (byte) (buffer.read(invert) - 1);
}
rdata[i] = b;
}
// 4. Read the null terminator.
byte b = buffer.read(invert);
assert (b == 0);
}
return bw;
}
case DATE:
{
DateWritable d = reuse == null ? new DateWritable() : (DateWritable) reuse;
d.set(deserializeInt(buffer, invert));
return d;
}
case TIMESTAMP:
TimestampWritable t = (reuse == null ? new TimestampWritable() : (TimestampWritable) reuse);
byte[] bytes = new byte[TimestampWritable.BINARY_SORTABLE_LENGTH];
for (int i = 0; i < bytes.length; i++) {
bytes[i] = buffer.read(invert);
}
t.setBinarySortable(bytes, 0);
return t;
case INTERVAL_YEAR_MONTH:
{
HiveIntervalYearMonthWritable i = reuse == null ? new HiveIntervalYearMonthWritable() : (HiveIntervalYearMonthWritable) reuse;
i.set(deserializeInt(buffer, invert));
return i;
}
case INTERVAL_DAY_TIME:
{
HiveIntervalDayTimeWritable i = reuse == null ? new HiveIntervalDayTimeWritable() : (HiveIntervalDayTimeWritable) reuse;
long totalSecs = deserializeLong(buffer, invert);
int nanos = deserializeInt(buffer, invert);
i.set(totalSecs, nanos);
return i;
}
case DECIMAL:
{
// See serialization of decimal for explanation (below)
HiveDecimalWritable bdw = (reuse == null ? new HiveDecimalWritable() : (HiveDecimalWritable) reuse);
int b = buffer.read(invert) - 1;
assert (b == 1 || b == -1 || b == 0);
boolean positive = b != -1;
int factor = buffer.read(invert) ^ 0x80;
for (int i = 0; i < 3; i++) {
factor = (factor << 8) + (buffer.read(invert) & 0xff);
}
if (!positive) {
factor = -factor;
}
int start = buffer.tell();
int length = 0;
do {
b = buffer.read(positive ? invert : !invert);
assert (b != 1);
if (b == 0) {
// end of digits
break;
}
length++;
} while (true);
final byte[] decimalBuffer = new byte[length];
buffer.seek(start);
for (int i = 0; i < length; ++i) {
decimalBuffer[i] = buffer.read(positive ? invert : !invert);
}
// read the null byte again
buffer.read(positive ? invert : !invert);
String digits = new String(decimalBuffer, 0, length, decimalCharSet);
BigInteger bi = new BigInteger(digits);
HiveDecimal bd = HiveDecimal.create(bi).scaleByPowerOfTen(factor - length);
if (!positive) {
bd = bd.negate();
}
bdw.set(bd);
return bdw;
}
default:
{
throw new RuntimeException("Unrecognized type: " + ptype.getPrimitiveCategory());
}
}
}
case LIST:
{
ListTypeInfo ltype = (ListTypeInfo) type;
TypeInfo etype = ltype.getListElementTypeInfo();
// Create the list if needed
ArrayList<Object> r = reuse == null ? new ArrayList<Object>() : (ArrayList<Object>) reuse;
// Read the list
int size = 0;
while (true) {
int more = buffer.read(invert);
if (more == 0) {
// \0 to terminate
break;
}
// \1 followed by each element
assert (more == 1);
if (size == r.size()) {
r.add(null);
}
r.set(size, deserialize(buffer, etype, invert, nullMarker, notNullMarker, r.get(size)));
size++;
}
// Remove additional elements if the list is reused
while (r.size() > size) {
r.remove(r.size() - 1);
}
return r;
}
case MAP:
{
MapTypeInfo mtype = (MapTypeInfo) type;
TypeInfo ktype = mtype.getMapKeyTypeInfo();
TypeInfo vtype = mtype.getMapValueTypeInfo();
// Create the map if needed
Map<Object, Object> r;
if (reuse == null) {
r = new HashMap<Object, Object>();
} else {
r = (HashMap<Object, Object>) reuse;
r.clear();
}
while (true) {
int more = buffer.read(invert);
if (more == 0) {
// \0 to terminate
break;
}
// \1 followed by each key and then each value
assert (more == 1);
Object k = deserialize(buffer, ktype, invert, nullMarker, notNullMarker, null);
Object v = deserialize(buffer, vtype, invert, nullMarker, notNullMarker, null);
r.put(k, v);
}
return r;
}
case STRUCT:
{
StructTypeInfo stype = (StructTypeInfo) type;
List<TypeInfo> fieldTypes = stype.getAllStructFieldTypeInfos();
int size = fieldTypes.size();
// Create the struct if needed
ArrayList<Object> r = reuse == null ? new ArrayList<Object>(size) : (ArrayList<Object>) reuse;
assert (r.size() <= size);
// Set the size of the struct
while (r.size() < size) {
r.add(null);
}
// Read one field by one field
for (int eid = 0; eid < size; eid++) {
r.set(eid, deserialize(buffer, fieldTypes.get(eid), invert, nullMarker, notNullMarker, r.get(eid)));
}
return r;
}
case UNION:
{
UnionTypeInfo utype = (UnionTypeInfo) type;
StandardUnion r = reuse == null ? new StandardUnion() : (StandardUnion) reuse;
// Read the tag
byte tag = buffer.read(invert);
r.setTag(tag);
r.setObject(deserialize(buffer, utype.getAllUnionObjectTypeInfos().get(tag), invert, nullMarker, notNullMarker, null));
return r;
}
default:
{
throw new RuntimeException("Unrecognized type: " + type.getCategory());
}
}
}
use of org.apache.hadoop.hive.serde2.objectinspector.StandardUnionObjectInspector.StandardUnion in project hive by apache.
the class TestStandardObjectInspectors method testStandardUnionObjectInspector.
@SuppressWarnings("unchecked")
public void testStandardUnionObjectInspector() throws Throwable {
try {
ArrayList<ObjectInspector> objectInspectors = new ArrayList<ObjectInspector>();
// add primitive types
objectInspectors.add(PrimitiveObjectInspectorFactory.javaIntObjectInspector);
objectInspectors.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
objectInspectors.add(PrimitiveObjectInspectorFactory.javaBooleanObjectInspector);
// add a list
objectInspectors.add(ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.javaIntObjectInspector));
// add a map
objectInspectors.add(ObjectInspectorFactory.getStandardMapObjectInspector(PrimitiveObjectInspectorFactory.javaIntObjectInspector, PrimitiveObjectInspectorFactory.javaStringObjectInspector));
// add a struct
List<String> fieldNames = new ArrayList<String>();
fieldNames.add("myDouble");
fieldNames.add("myLong");
ArrayList<ObjectInspector> fieldObjectInspectors = new ArrayList<ObjectInspector>();
fieldObjectInspectors.add(PrimitiveObjectInspectorFactory.javaDoubleObjectInspector);
fieldObjectInspectors.add(PrimitiveObjectInspectorFactory.javaLongObjectInspector);
objectInspectors.add(ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldObjectInspectors));
StandardUnionObjectInspector uoi1 = ObjectInspectorFactory.getStandardUnionObjectInspector(objectInspectors);
StandardUnionObjectInspector uoi2 = ObjectInspectorFactory.getStandardUnionObjectInspector((ArrayList<ObjectInspector>) objectInspectors.clone());
assertEquals(uoi1, uoi2);
assertEquals(ObjectInspectorUtils.getObjectInspectorName(uoi1), ObjectInspectorUtils.getObjectInspectorName(uoi2));
assertTrue(ObjectInspectorUtils.compareTypes(uoi1, uoi2));
// compareSupported returns false because Union can contain
// an object of Map
assertFalse(ObjectInspectorUtils.compareSupported(uoi1));
// construct unionObjectInspector without Map field.
ArrayList<ObjectInspector> ois = (ArrayList<ObjectInspector>) objectInspectors.clone();
ois.set(4, PrimitiveObjectInspectorFactory.javaIntObjectInspector);
assertTrue(ObjectInspectorUtils.compareSupported(ObjectInspectorFactory.getStandardUnionObjectInspector(ois)));
// metadata
assertEquals(Category.UNION, uoi1.getCategory());
List<? extends ObjectInspector> uois = uoi1.getObjectInspectors();
assertEquals(6, uois.size());
for (int i = 0; i < 6; i++) {
assertEquals(objectInspectors.get(i), uois.get(i));
}
StringBuilder unionTypeName = new StringBuilder();
unionTypeName.append("uniontype<");
for (int i = 0; i < uois.size(); i++) {
if (i > 0) {
unionTypeName.append(",");
}
unionTypeName.append(uois.get(i).getTypeName());
}
unionTypeName.append(">");
assertEquals(unionTypeName.toString(), uoi1.getTypeName());
// TypeInfo
TypeInfo typeInfo1 = TypeInfoUtils.getTypeInfoFromObjectInspector(uoi1);
assertEquals(Category.UNION, typeInfo1.getCategory());
assertEquals(UnionTypeInfo.class.getName(), typeInfo1.getClass().getName());
assertEquals(typeInfo1.getTypeName(), uoi1.getTypeName());
assertEquals(typeInfo1, TypeInfoUtils.getTypeInfoFromTypeString(uoi1.getTypeName()));
TypeInfo typeInfo2 = TypeInfoUtils.getTypeInfoFromObjectInspector(uoi2);
assertEquals(typeInfo1, typeInfo2);
assertEquals(TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(typeInfo1), TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(typeInfo2));
assertEquals(TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(typeInfo1), TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(typeInfo2));
// null
assertNull(uoi1.getField(null));
assertEquals(-1, uoi1.getTag(null));
// Union
UnionObject union = new StandardUnion((byte) 0, 1);
assertEquals(0, uoi1.getTag(union));
assertEquals(1, uoi1.getField(union));
assertEquals("{0:1}", SerDeUtils.getJSONString(union, uoi1));
assertEquals(0, ObjectInspectorUtils.compare(union, uoi1, new StandardUnion((byte) 0, 1), uoi2));
assertTrue(ObjectInspectorUtils.copyToStandardObject(union, uoi1).equals(1));
union = new StandardUnion((byte) 1, "two");
assertEquals(1, uoi1.getTag(union));
assertEquals("two", uoi1.getField(union));
assertEquals("{1:\"two\"}", SerDeUtils.getJSONString(union, uoi1));
assertEquals(0, ObjectInspectorUtils.compare(union, uoi1, new StandardUnion((byte) 1, "two"), uoi2));
assertTrue(ObjectInspectorUtils.copyToStandardObject(union, uoi1).equals("two"));
union = new StandardUnion((byte) 2, true);
assertEquals(2, uoi1.getTag(union));
assertEquals(true, uoi1.getField(union));
assertEquals("{2:true}", SerDeUtils.getJSONString(union, uoi1));
assertEquals(0, ObjectInspectorUtils.compare(union, uoi1, new StandardUnion((byte) 2, true), uoi2));
assertTrue(ObjectInspectorUtils.copyToStandardObject(union, uoi1).equals(true));
ArrayList<Integer> iList = new ArrayList<Integer>();
iList.add(4);
iList.add(5);
union = new StandardUnion((byte) 3, iList);
assertEquals(3, uoi1.getTag(union));
assertEquals(iList, uoi1.getField(union));
assertEquals("{3:[4,5]}", SerDeUtils.getJSONString(union, uoi1));
assertEquals(0, ObjectInspectorUtils.compare(union, uoi1, new StandardUnion((byte) 3, iList.clone()), uoi2));
assertTrue(ObjectInspectorUtils.copyToStandardObject(union, uoi1).equals(iList));
HashMap<Integer, String> map = new HashMap<Integer, String>();
map.put(6, "six");
map.put(7, "seven");
map.put(8, "eight");
union = new StandardUnion((byte) 4, map);
assertEquals(4, uoi1.getTag(union));
assertEquals(map, uoi1.getField(union));
assertEquals("{4:{6:\"six\",7:\"seven\",8:\"eight\"}}", SerDeUtils.getJSONString(union, uoi1));
Throwable th = null;
try {
ObjectInspectorUtils.compare(union, uoi1, new StandardUnion((byte) 4, map.clone()), uoi2, null);
} catch (Throwable t) {
th = t;
}
assertNotNull(th);
assertEquals("Compare on map type not supported!", th.getMessage());
assertTrue(ObjectInspectorUtils.copyToStandardObject(union, uoi1).equals(map));
ArrayList<Object> struct = new ArrayList<Object>(2);
struct.add(9.0);
struct.add(10L);
union = new StandardUnion((byte) 5, struct);
assertEquals(5, uoi1.getTag(union));
assertEquals(struct, uoi1.getField(union));
assertEquals("{5:{\"mydouble\":9.0,\"mylong\":10}}", SerDeUtils.getJSONString(union, uoi1));
assertEquals(0, ObjectInspectorUtils.compare(union, uoi1, new StandardUnion((byte) 5, struct.clone()), uoi2));
assertTrue(ObjectInspectorUtils.copyToStandardObject(union, uoi1).equals(struct));
} catch (Throwable e) {
e.printStackTrace();
throw e;
}
}
use of org.apache.hadoop.hive.serde2.objectinspector.StandardUnionObjectInspector.StandardUnion in project hive by apache.
the class ReduceSinkOperator method populateCachedDistinctKeys.
/**
* Populate distinct keys part of cachedKeys for a particular row.
* @param row the row
* @param index the cachedKeys index to write to
*/
private void populateCachedDistinctKeys(Object row, int index) throws HiveException {
StandardUnion union;
cachedKeys[index][numDistributionKeys] = union = new StandardUnion((byte) index, new Object[distinctColIndices.get(index).size()]);
Object[] distinctParameters = (Object[]) union.getObject();
for (int distinctParamI = 0; distinctParamI < distinctParameters.length; distinctParamI++) {
distinctParameters[distinctParamI] = keyEval[distinctColIndices.get(index).get(distinctParamI)].evaluate(row);
}
union.setTag((byte) index);
}
Aggregations