use of org.apache.hadoop.hive.serde2.lazy.LazyArray in project hive by apache.
the class TestLazyArrayMapStruct method testNestedinArrayAtLevel.
/**
* @param nestingLevel
* @param dtype
* @param tableProp
* @throws SerDeException
*/
private void testNestedinArrayAtLevel(int nestingLevel, ObjectInspector.Category dtype, Properties tableProp) throws SerDeException {
//create type with nestingLevel levels of nesting
//set inner schema for dtype
String inSchema = null;
switch(dtype) {
case LIST:
inSchema = "array<tinyint>";
break;
case MAP:
inSchema = "map<string,int>";
break;
case STRUCT:
inSchema = "struct<s:string,i:tinyint>";
break;
case UNION:
inSchema = "uniontype<string,tinyint>";
break;
default:
fail("type not supported by test case");
}
StringBuilder schema = new StringBuilder(inSchema);
for (int i = 0; i < nestingLevel - 1; i++) {
schema.insert(0, "array<");
schema.append(">");
}
System.err.println("Testing nesting level " + nestingLevel + ". Using schema " + schema);
// Create the SerDe
LazySimpleSerDe serDe = new LazySimpleSerDe();
Configuration conf = new Configuration();
tableProp.setProperty("columns", "narray");
tableProp.setProperty("columns.types", schema.toString());
SerDeUtils.initializeSerDe(serDe, conf, tableProp, null);
LazySerDeParameters serdeParams = new LazySerDeParameters(conf, tableProp, LazySimpleSerDe.class.getName());
//create the serialized string for type
byte[] separators = serdeParams.getSeparators();
System.err.println("Using separator " + (char) separators[nestingLevel]);
byte[] serializedRow = null;
switch(dtype) {
case LIST:
serializedRow = new byte[] { '8', separators[nestingLevel], '9' };
break;
case MAP:
byte kvSep = separators[nestingLevel + 1];
byte kvPairSep = separators[nestingLevel];
serializedRow = new byte[] { '1', kvSep, '1', kvPairSep, '2', kvSep, '2' };
break;
case STRUCT:
serializedRow = new byte[] { '8', separators[nestingLevel], '9' };
break;
case UNION:
serializedRow = new byte[] { '0', separators[nestingLevel], '9' };
break;
default:
fail("type not supported by test case");
}
//create LazyStruct with serialized string with expected separators
StructObjectInspector oi = (StructObjectInspector) serDe.getObjectInspector();
LazyStruct struct = (LazyStruct) LazyFactory.createLazyObject(oi);
TestLazyPrimitive.initLazyObject(struct, serializedRow, 0, serializedRow.length);
//Get fields out of the lazy struct and check if they match expected
// results
//Get first level array
LazyArray array = (LazyArray) struct.getField(0);
//Peel off the n-1 levels to get to the underlying array
for (int i = 0; i < nestingLevel - 2; i++) {
array = (LazyArray) array.getListElementObject(0);
}
//verify the serialized format for dtype
switch(dtype) {
case LIST:
LazyArray array1 = (LazyArray) array.getListElementObject(0);
//check elements of the innermost array
assertEquals(2, array1.getListLength());
assertEquals(new ByteWritable((byte) 8), ((LazyByte) array1.getListElementObject(0)).getWritableObject());
assertEquals(new ByteWritable((byte) 9), ((LazyByte) array1.getListElementObject(1)).getWritableObject());
break;
case MAP:
LazyMap lazyMap = (LazyMap) array.getListElementObject(0);
Map map = lazyMap.getMap();
System.err.println(map);
assertEquals(2, map.size());
Iterator<Map.Entry<LazyString, LazyInteger>> it = map.entrySet().iterator();
Entry<LazyString, LazyInteger> e1 = it.next();
assertEquals(e1.getKey().getWritableObject(), new Text(new byte[] { '1' }));
assertEquals(e1.getValue().getWritableObject(), new IntWritable(1));
Entry<LazyString, LazyInteger> e2 = it.next();
assertEquals(e2.getKey().getWritableObject(), new Text(new byte[] { '2' }));
assertEquals(e2.getValue().getWritableObject(), new IntWritable(2));
break;
case STRUCT:
LazyStruct innerStruct = (LazyStruct) array.getListElementObject(0);
//check elements of the innermost struct
assertEquals(2, innerStruct.getFieldsAsList().size());
assertEquals(new Text(new byte[] { '8' }), ((LazyString) innerStruct.getField(0)).getWritableObject());
assertEquals(new ByteWritable((byte) 9), ((LazyByte) innerStruct.getField(1)).getWritableObject());
break;
case UNION:
LazyUnion lazyUnion = (LazyUnion) array.getListElementObject(0);
//check elements of the innermost union
assertEquals(new Text(new byte[] { '9' }), ((LazyString) lazyUnion.getField()).getWritableObject());
break;
default:
fail("type not supported by test case");
}
//test serialization
Text serializedText = (Text) serDe.serialize(struct.getObject(), serDe.getObjectInspector());
org.junit.Assert.assertArrayEquals(serializedRow, serializedText.getBytes());
}
use of org.apache.hadoop.hive.serde2.lazy.LazyArray in project hive by apache.
the class TestLazyArrayMapStruct method testLazyArray.
/**
* Test the LazyArray class.
*/
public void testLazyArray() throws Throwable {
try {
// Array of Byte
Text nullSequence = new Text("\\N");
ObjectInspector oi = LazyFactory.createLazyObjectInspector(TypeInfoUtils.getTypeInfosFromTypeString("array<tinyint>").get(0), new byte[] { (byte) 1 }, 0, nullSequence, false, (byte) 0);
LazyArray b = (LazyArray) LazyFactory.createLazyObject(oi);
byte[] data = new byte[] { '-', '1', 1, '\\', 'N', 1, '8' };
TestLazyPrimitive.initLazyObject(b, data, 0, data.length);
assertNull(b.getListElementObject(-1));
assertEquals(new ByteWritable((byte) -1), ((LazyByte) b.getListElementObject(0)).getWritableObject());
assertEquals(new ByteWritable((byte) -1), ((LazyByte) b.getList().get(0)).getWritableObject());
assertNull(b.getListElementObject(1));
assertNull(b.getList().get(1));
assertEquals(new ByteWritable((byte) 8), ((LazyByte) b.getListElementObject(2)).getWritableObject());
assertEquals(new ByteWritable((byte) 8), ((LazyByte) b.getList().get(2)).getWritableObject());
assertNull(b.getListElementObject(3));
assertEquals(3, b.getList().size());
// Array of String
oi = LazyFactory.createLazyObjectInspector(TypeInfoUtils.getTypeInfosFromTypeString("array<string>").get(0), new byte[] { (byte) '\t' }, 0, nullSequence, false, (byte) 0);
b = (LazyArray) LazyFactory.createLazyObject(oi);
data = new byte[] { 'a', 'b', '\t', 'c', '\t', '\\', 'N', '\t', '\t', 'd' };
// Note: the first and last element of the byte[] are NOT used
TestLazyPrimitive.initLazyObject(b, data, 1, data.length - 2);
assertNull(b.getListElementObject(-1));
assertEquals(new Text("b"), ((LazyString) b.getListElementObject(0)).getWritableObject());
assertEquals(new Text("b"), ((LazyString) b.getList().get(0)).getWritableObject());
assertEquals(new Text("c"), ((LazyString) b.getListElementObject(1)).getWritableObject());
assertEquals(new Text("c"), ((LazyString) b.getList().get(1)).getWritableObject());
assertNull((b.getListElementObject(2)));
assertNull((b.getList().get(2)));
assertEquals(new Text(""), ((LazyString) b.getListElementObject(3)).getWritableObject());
assertEquals(new Text(""), ((LazyString) b.getList().get(3)).getWritableObject());
assertEquals(new Text(""), ((LazyString) b.getListElementObject(4)).getWritableObject());
assertEquals(new Text(""), ((LazyString) b.getList().get(4)).getWritableObject());
assertNull((b.getListElementObject(5)));
assertEquals(5, b.getList().size());
// -- HIVE-4149
b = (LazyArray) LazyFactory.createLazyObject(oi);
data = new byte[] { 'a', '\t', '\\', 'N' };
TestLazyPrimitive.initLazyObject(b, data, 0, data.length);
assertEquals(new Text("a"), ((LazyString) b.getListElementObject(0)).getWritableObject());
assertNull(b.getListElementObject(1));
data = new byte[] { '\\', 'N', '\t', 'a' };
TestLazyPrimitive.initLazyObject(b, data, 0, data.length);
assertNull(b.getListElementObject(0));
// twice (returns not cleaned cache)
assertNull(b.getListElementObject(0));
assertEquals(new Text("a"), ((LazyString) b.getListElementObject(1)).getWritableObject());
} catch (Throwable e) {
e.printStackTrace();
throw e;
}
}
use of org.apache.hadoop.hive.serde2.lazy.LazyArray in project hive by apache.
the class TestAccumuloSerDe method testArraySerialization.
@Test
public void testArraySerialization() throws Exception {
Properties properties = new Properties();
Configuration conf = new Configuration();
properties.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowID,cf:vals");
properties.setProperty(serdeConstants.LIST_COLUMNS, "row,values");
properties.setProperty(serdeConstants.LIST_COLUMN_TYPES, "string,array<string>");
properties.setProperty(serdeConstants.COLLECTION_DELIM, ":");
// Get one of the default separators to avoid having to set a custom separator
char separator = ':';
serde.initialize(conf, properties);
AccumuloHiveRow row = new AccumuloHiveRow();
row.setRowId("r1");
row.add("cf", "vals", ("value1" + separator + "value2" + separator + "value3").getBytes());
Object obj = serde.deserialize(row);
assertNotNull(obj);
assertTrue(obj instanceof LazyAccumuloRow);
LazyAccumuloRow lazyRow = (LazyAccumuloRow) obj;
Object field0 = lazyRow.getField(0);
assertNotNull(field0);
assertTrue(field0 instanceof LazyString);
assertEquals(row.getRowId(), ((LazyString) field0).getWritableObject().toString());
Object field1 = lazyRow.getField(1);
assertNotNull(field1);
assertTrue(field1 instanceof LazyArray);
LazyArray array = (LazyArray) field1;
List<Object> values = array.getList();
assertEquals(3, values.size());
for (int i = 0; i < 3; i++) {
Object o = values.get(i);
assertNotNull(o);
assertTrue(o instanceof LazyString);
assertEquals("value" + (i + 1), ((LazyString) o).getWritableObject().toString());
}
}
use of org.apache.hadoop.hive.serde2.lazy.LazyArray in project hive by apache.
the class AvroLazyObjectInspector method toLazyListObject.
/**
* Convert the given object to a lazy object using the given {@link ObjectInspector}
*
* @param obj Object to be converted to a {@link LazyObject}
* @param oi ObjectInspector used for the conversion
* @return the created {@link LazyObject lazy object}
* */
private Object toLazyListObject(Object obj, ObjectInspector objectInspector) {
if (obj == null) {
return null;
}
List<?> listObj = (List<?>) obj;
LazyArray retList = (LazyArray) LazyFactory.createLazyObject(objectInspector);
List<Object> lazyList = retList.getList();
ObjectInspector listElementOI = ((ListObjectInspector) objectInspector).getListElementObjectInspector();
for (int i = 0; i < listObj.size(); i++) {
lazyList.add(toLazyObject(listObj.get(i), listElementOI));
}
return retList;
}
use of org.apache.hadoop.hive.serde2.lazy.LazyArray in project presto by prestodb.
the class RcFileTester method decodeRecordReaderValue.
private static Object decodeRecordReaderValue(Type type, Object actualValue) {
if (actualValue instanceof LazyPrimitive) {
actualValue = ((LazyPrimitive<?, ?>) actualValue).getWritableObject();
}
if (actualValue instanceof BooleanWritable) {
actualValue = ((BooleanWritable) actualValue).get();
} else if (actualValue instanceof ByteWritable) {
actualValue = ((ByteWritable) actualValue).get();
} else if (actualValue instanceof BytesWritable) {
actualValue = new SqlVarbinary(((BytesWritable) actualValue).copyBytes());
} else if (actualValue instanceof DateWritable) {
actualValue = new SqlDate(((DateWritable) actualValue).getDays());
} else if (actualValue instanceof DoubleWritable) {
actualValue = ((DoubleWritable) actualValue).get();
} else if (actualValue instanceof FloatWritable) {
actualValue = ((FloatWritable) actualValue).get();
} else if (actualValue instanceof IntWritable) {
actualValue = ((IntWritable) actualValue).get();
} else if (actualValue instanceof LongWritable) {
actualValue = ((LongWritable) actualValue).get();
} else if (actualValue instanceof ShortWritable) {
actualValue = ((ShortWritable) actualValue).get();
} else if (actualValue instanceof HiveDecimalWritable) {
DecimalType decimalType = (DecimalType) type;
HiveDecimalWritable writable = (HiveDecimalWritable) actualValue;
// writable messes with the scale so rescale the values to the Presto type
BigInteger rescaledValue = rescale(writable.getHiveDecimal().unscaledValue(), writable.getScale(), decimalType.getScale());
actualValue = new SqlDecimal(rescaledValue, decimalType.getPrecision(), decimalType.getScale());
} else if (actualValue instanceof Text) {
actualValue = actualValue.toString();
} else if (actualValue instanceof TimestampWritable) {
TimestampWritable timestamp = (TimestampWritable) actualValue;
actualValue = new SqlTimestamp((timestamp.getSeconds() * 1000) + (timestamp.getNanos() / 1000000L), UTC_KEY);
} else if (actualValue instanceof StructObject) {
StructObject structObject = (StructObject) actualValue;
actualValue = decodeRecordReaderStruct(type, structObject.getFieldsAsList());
} else if (actualValue instanceof LazyBinaryArray) {
actualValue = decodeRecordReaderList(type, ((LazyBinaryArray) actualValue).getList());
} else if (actualValue instanceof LazyBinaryMap) {
actualValue = decodeRecordReaderMap(type, ((LazyBinaryMap) actualValue).getMap());
} else if (actualValue instanceof LazyArray) {
actualValue = decodeRecordReaderList(type, ((LazyArray) actualValue).getList());
} else if (actualValue instanceof LazyMap) {
actualValue = decodeRecordReaderMap(type, ((LazyMap) actualValue).getMap());
} else if (actualValue instanceof List) {
actualValue = decodeRecordReaderList(type, ((List<?>) actualValue));
}
return actualValue;
}
Aggregations