use of org.apache.hadoop.hive.serde2.lazy.LazyStruct in project hive by apache.
the class TestLazySimpleFast method testLazySimpleFast.
private void testLazySimpleFast(SerdeRandomRowSource source, Object[][] rows, LazySimpleSerDe serde, StructObjectInspector rowOI, LazySimpleSerDe serde_fewer, StructObjectInspector writeRowOI, byte separator, LazySerDeParameters serdeParams, LazySerDeParameters serdeParams_fewer, PrimitiveTypeInfo[] primitiveTypeInfos, boolean useIncludeColumns, boolean doWriteFewerColumns, Random r) throws Throwable {
int rowCount = rows.length;
int columnCount = primitiveTypeInfos.length;
boolean[] columnsToInclude = null;
if (useIncludeColumns) {
columnsToInclude = new boolean[columnCount];
for (int i = 0; i < columnCount; i++) {
columnsToInclude[i] = r.nextBoolean();
}
}
int writeColumnCount = columnCount;
PrimitiveTypeInfo[] writePrimitiveTypeInfos = primitiveTypeInfos;
if (doWriteFewerColumns) {
writeColumnCount = writeRowOI.getAllStructFieldRefs().size();
writePrimitiveTypeInfos = Arrays.copyOf(primitiveTypeInfos, writeColumnCount);
}
// Try to serialize
BytesWritable[] serializeWriteBytes = new BytesWritable[rowCount];
for (int i = 0; i < rowCount; i++) {
Object[] row = rows[i];
Output output = new Output();
LazySimpleSerializeWrite lazySimpleSerializeWrite = new LazySimpleSerializeWrite(columnCount, separator, serdeParams);
lazySimpleSerializeWrite.set(output);
for (int index = 0; index < columnCount; index++) {
Writable writable = (Writable) row[index];
VerifyFast.serializeWrite(lazySimpleSerializeWrite, primitiveTypeInfos[index], writable);
}
BytesWritable bytesWritable = new BytesWritable();
bytesWritable.set(output.getData(), 0, output.getLength());
serializeWriteBytes[i] = bytesWritable;
}
// Try to deserialize
for (int i = 0; i < rowCount; i++) {
Object[] row = rows[i];
LazySimpleDeserializeRead lazySimpleDeserializeRead = new LazySimpleDeserializeRead(writePrimitiveTypeInfos, /* useExternalBuffer */
false, separator, serdeParams);
BytesWritable bytesWritable = serializeWriteBytes[i];
byte[] bytes = bytesWritable.getBytes();
int length = bytesWritable.getLength();
lazySimpleDeserializeRead.set(bytes, 0, length);
char[] chars = new char[length];
for (int c = 0; c < chars.length; c++) {
chars[c] = (char) (bytes[c] & 0xFF);
}
for (int index = 0; index < columnCount; index++) {
if (useIncludeColumns && !columnsToInclude[index]) {
lazySimpleDeserializeRead.skipNextField();
} else if (index >= writeColumnCount) {
// Should come back a null.
VerifyFast.verifyDeserializeRead(lazySimpleDeserializeRead, primitiveTypeInfos[index], null);
} else {
Writable writable = (Writable) row[index];
VerifyFast.verifyDeserializeRead(lazySimpleDeserializeRead, primitiveTypeInfos[index], writable);
}
}
if (writeColumnCount == columnCount) {
TestCase.assertTrue(lazySimpleDeserializeRead.isEndOfInputReached());
}
}
// Try to deserialize using SerDe class our Writable row objects created by SerializeWrite.
for (int i = 0; i < rowCount; i++) {
BytesWritable bytesWritable = serializeWriteBytes[i];
LazyStruct lazySimpleStruct = (LazyStruct) serde.deserialize(bytesWritable);
Object[] row = rows[i];
for (int index = 0; index < columnCount; index++) {
PrimitiveTypeInfo primitiveTypeInfo = primitiveTypeInfos[index];
Writable writable = (Writable) row[index];
LazyPrimitive lazyPrimitive = (LazyPrimitive) lazySimpleStruct.getField(index);
Object object;
if (lazyPrimitive != null) {
object = lazyPrimitive.getWritableObject();
} else {
object = null;
}
if (writable == null || object == null) {
if (writable != null || object != null) {
fail("SerDe deserialized NULL column mismatch");
}
} else {
if (!object.equals(writable)) {
fail("SerDe deserialized value does not match");
}
}
}
}
// One Writable per row.
byte[][] serdeBytes = new byte[rowCount][];
// Serialize using the SerDe, then below deserialize using DeserializeRead.
Object[] serdeRow = new Object[columnCount];
for (int i = 0; i < rowCount; i++) {
Object[] row = rows[i];
// LazySimple seems to work better with an row object array instead of a Java object...
for (int index = 0; index < columnCount; index++) {
serdeRow[index] = row[index];
}
Text serialized = (Text) serde.serialize(serdeRow, rowOI);
byte[] bytes1 = Arrays.copyOfRange(serialized.getBytes(), 0, serialized.getLength());
byte[] bytes2 = Arrays.copyOfRange(serializeWriteBytes[i].getBytes(), 0, serializeWriteBytes[i].getLength());
if (!Arrays.equals(bytes1, bytes2)) {
fail("SerializeWrite and SerDe serialization does not match");
}
serdeBytes[i] = copyBytes(serialized);
}
// Try to deserialize using DeserializeRead our Writable row objects created by SerDe.
for (int i = 0; i < rowCount; i++) {
Object[] row = rows[i];
LazySimpleDeserializeRead lazySimpleDeserializeRead = new LazySimpleDeserializeRead(writePrimitiveTypeInfos, /* useExternalBuffer */
false, separator, serdeParams);
byte[] bytes = serdeBytes[i];
lazySimpleDeserializeRead.set(bytes, 0, bytes.length);
for (int index = 0; index < columnCount; index++) {
if (useIncludeColumns && !columnsToInclude[index]) {
lazySimpleDeserializeRead.skipNextField();
} else if (index >= writeColumnCount) {
// Should come back a null.
VerifyFast.verifyDeserializeRead(lazySimpleDeserializeRead, primitiveTypeInfos[index], null);
} else {
Writable writable = (Writable) row[index];
VerifyFast.verifyDeserializeRead(lazySimpleDeserializeRead, primitiveTypeInfos[index], writable);
}
}
if (writeColumnCount == columnCount) {
TestCase.assertTrue(lazySimpleDeserializeRead.isEndOfInputReached());
}
}
}
use of org.apache.hadoop.hive.serde2.lazy.LazyStruct in project hive by apache.
the class TestAccumuloSerDe method testStructOfMapSerialization.
@Test
public void testStructOfMapSerialization() throws IOException, SerDeException {
List<String> columns = Arrays.asList("row", "col");
List<String> structColNames = Arrays.asList("map1", "map2");
TypeInfo mapTypeInfo = TypeInfoFactory.getMapTypeInfo(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo);
// struct<map1:map<string,string>,map2:map<string,string>>,string
List<TypeInfo> types = Arrays.<TypeInfo>asList(TypeInfoFactory.getStructTypeInfo(structColNames, Arrays.asList(mapTypeInfo, mapTypeInfo)), TypeInfoFactory.stringTypeInfo);
Properties tableProperties = new Properties();
tableProperties.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowid,cf:cq");
// Use the default separators [0, 1, 2, 3, ..., 7]
tableProperties.setProperty(serdeConstants.LIST_COLUMNS, Joiner.on(',').join(columns));
tableProperties.setProperty(serdeConstants.LIST_COLUMN_TYPES, Joiner.on(',').join(types));
AccumuloSerDeParameters accumuloSerDeParams = new AccumuloSerDeParameters(new Configuration(), tableProperties, AccumuloSerDe.class.getSimpleName());
LazySerDeParameters serDeParams = accumuloSerDeParams.getSerDeParameters();
byte[] seps = serDeParams.getSeparators();
// struct<map<k:v,k:v>_map<k:v,k:v>>>
TypeInfo stringTypeInfo = TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.STRING_TYPE_NAME);
LazyStringObjectInspector stringOI = (LazyStringObjectInspector) LazyFactory.createLazyObjectInspector(stringTypeInfo, new byte[] { 0 }, 0, serDeParams.getNullSequence(), serDeParams.isEscaped(), serDeParams.getEscapeChar());
LazyMapObjectInspector mapOI = LazyObjectInspectorFactory.getLazySimpleMapObjectInspector(stringOI, stringOI, seps[3], seps[4], serDeParams.getNullSequence(), serDeParams.isEscaped(), serDeParams.getEscapeChar());
LazySimpleStructObjectInspector rowStructOI = (LazySimpleStructObjectInspector) LazyObjectInspectorFactory.getLazySimpleStructObjectInspector(structColNames, Arrays.<ObjectInspector>asList(mapOI, mapOI), (byte) seps[2], serDeParams.getNullSequence(), serDeParams.isLastColumnTakesRest(), serDeParams.isEscaped(), serDeParams.getEscapeChar());
LazySimpleStructObjectInspector structOI = (LazySimpleStructObjectInspector) LazyObjectInspectorFactory.getLazySimpleStructObjectInspector(columns, Arrays.asList(rowStructOI, stringOI), seps[1], serDeParams.getNullSequence(), serDeParams.isLastColumnTakesRest(), serDeParams.isEscaped(), serDeParams.getEscapeChar());
AccumuloRowSerializer serializer = new AccumuloRowSerializer(0, serDeParams, accumuloSerDeParams.getColumnMappings(), new ColumnVisibility(), accumuloSerDeParams.getRowIdFactory());
Map<String, String> map1 = new HashMap<String, String>(), map2 = new HashMap<String, String>();
map1.put("key10", "value10");
map1.put("key11", "value11");
map2.put("key20", "value20");
map2.put("key21", "value21");
ByteArrayRef byteRef = new ByteArrayRef();
// Default separators are 1-indexed (instead of 0-indexed), thus the separator at offset 1 is
// (byte) 2
// The separator for the hive row is \x02, for the row Id struct, \x03, and the maps \x04 and
// \x05
String accumuloRow = "key10\5value10\4key11\5value11\3key20\5value20\4key21\5value21";
LazyStruct entireStruct = (LazyStruct) LazyFactory.createLazyObject(structOI);
byteRef.setData((accumuloRow + "\2foo").getBytes());
entireStruct.init(byteRef, 0, byteRef.getData().length);
Mutation m = serializer.serialize(entireStruct, structOI);
Assert.assertArrayEquals(accumuloRow.getBytes(), m.getRow());
Assert.assertEquals(1, m.getUpdates().size());
ColumnUpdate update = m.getUpdates().get(0);
Assert.assertEquals("cf", new String(update.getColumnFamily()));
Assert.assertEquals("cq", new String(update.getColumnQualifier()));
Assert.assertEquals("foo", new String(update.getValue()));
AccumuloHiveRow haRow = new AccumuloHiveRow(new String(m.getRow()));
haRow.add("cf", "cq", "foo".getBytes());
LazyAccumuloRow lazyAccumuloRow = new LazyAccumuloRow(structOI);
lazyAccumuloRow.init(haRow, accumuloSerDeParams.getColumnMappings(), accumuloSerDeParams.getRowIdFactory());
List<Object> objects = lazyAccumuloRow.getFieldsAsList();
Assert.assertEquals(2, objects.size());
Assert.assertEquals("foo", objects.get(1).toString());
LazyStruct rowStruct = (LazyStruct) objects.get(0);
List<Object> rowObjects = rowStruct.getFieldsAsList();
Assert.assertEquals(2, rowObjects.size());
LazyMap rowMap = (LazyMap) rowObjects.get(0);
Map<?, ?> actualMap = rowMap.getMap();
System.out.println("Actual map 1: " + actualMap);
Map<String, String> actualStringMap = new HashMap<String, String>();
for (Entry<?, ?> entry : actualMap.entrySet()) {
actualStringMap.put(entry.getKey().toString(), entry.getValue().toString());
}
Assert.assertEquals(map1, actualStringMap);
rowMap = (LazyMap) rowObjects.get(1);
actualMap = rowMap.getMap();
System.out.println("Actual map 2: " + actualMap);
actualStringMap = new HashMap<String, String>();
for (Entry<?, ?> entry : actualMap.entrySet()) {
actualStringMap.put(entry.getKey().toString(), entry.getValue().toString());
}
Assert.assertEquals(map2, actualStringMap);
}
use of org.apache.hadoop.hive.serde2.lazy.LazyStruct in project hive by apache.
the class TestAccumuloSerDe method testCompositeKeyDeserialization.
@Test
public void testCompositeKeyDeserialization() throws Exception {
Properties properties = new Properties();
Configuration conf = new Configuration();
properties.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowID,cf:f1");
properties.setProperty(serdeConstants.LIST_COLUMNS, "row,field1");
properties.setProperty(serdeConstants.LIST_COLUMN_TYPES, "struct<col1:string,col2:string,col3:string>,string");
properties.setProperty(DelimitedAccumuloRowIdFactory.ACCUMULO_COMPOSITE_DELIMITER, "_");
properties.setProperty(AccumuloSerDeParameters.COMPOSITE_ROWID_FACTORY, DelimitedAccumuloRowIdFactory.class.getName());
serde.initialize(conf, properties, null);
AccumuloHiveRow row = new AccumuloHiveRow();
row.setRowId("p1_p2_p3");
row.add("cf", "f1", "v1".getBytes());
Object obj = serde.deserialize(row);
assertTrue(obj instanceof LazyAccumuloRow);
LazyAccumuloRow lazyRow = (LazyAccumuloRow) obj;
Object field0 = lazyRow.getField(0);
assertNotNull(field0);
assertTrue(field0 instanceof LazyStruct);
LazyStruct struct = (LazyStruct) field0;
List<Object> fields = struct.getFieldsAsList();
assertEquals(3, fields.size());
for (int i = 0; i < fields.size(); i++) {
assertEquals(LazyString.class, fields.get(i).getClass());
assertEquals("p" + (i + 1), fields.get(i).toString());
}
Object field1 = lazyRow.getField(1);
assertNotNull(field1);
assertTrue("Expected instance of LazyString but was " + field1.getClass(), field1 instanceof LazyString);
assertEquals(field1.toString(), "v1");
}
use of org.apache.hadoop.hive.serde2.lazy.LazyStruct in project hive by apache.
the class TestAccumuloRowSerializer method testBinarySerialization.
@Test
public void testBinarySerialization() throws IOException, SerDeException {
List<String> columns = Arrays.asList("row", "cq1", "cq2", "cq3");
List<TypeInfo> types = Arrays.<TypeInfo>asList(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.intTypeInfo, TypeInfoFactory.intTypeInfo, TypeInfoFactory.stringTypeInfo);
List<String> typeNames = new ArrayList<String>(types.size());
for (TypeInfo type : types) {
typeNames.add(type.getTypeName());
}
Properties tableProperties = new Properties();
tableProperties.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowid,cf:cq1#b,cf:cq2#b,cf:cq3");
tableProperties.setProperty(serdeConstants.FIELD_DELIM, " ");
tableProperties.setProperty(serdeConstants.LIST_COLUMNS, Joiner.on(',').join(columns));
tableProperties.setProperty(serdeConstants.LIST_COLUMN_TYPES, Joiner.on(',').join(typeNames));
AccumuloSerDeParameters accumuloSerDeParams = new AccumuloSerDeParameters(new Configuration(), tableProperties, AccumuloSerDe.class.getSimpleName());
LazySerDeParameters serDeParams = accumuloSerDeParams.getSerDeParameters();
LazySimpleStructObjectInspector oi = (LazySimpleStructObjectInspector) LazyFactory.createLazyStructInspector(columns, types, serDeParams.getSeparators(), serDeParams.getNullSequence(), serDeParams.isLastColumnTakesRest(), serDeParams.isEscaped(), serDeParams.getEscapeChar());
AccumuloRowSerializer serializer = new AccumuloRowSerializer(0, serDeParams, accumuloSerDeParams.getColumnMappings(), new ColumnVisibility(), accumuloSerDeParams.getRowIdFactory());
// Create the LazyStruct from the LazyStruct...Inspector
LazyStruct obj = (LazyStruct) LazyFactory.createLazyObject(oi);
ByteArrayRef byteRef = new ByteArrayRef();
byteRef.setData(new byte[] { 'r', 'o', 'w', '1', ' ', '1', '0', ' ', '2', '0', ' ', 'v', 'a', 'l', 'u', 'e' });
obj.init(byteRef, 0, byteRef.getData().length);
Mutation m = (Mutation) serializer.serialize(obj, oi);
Assert.assertArrayEquals("row1".getBytes(), m.getRow());
List<ColumnUpdate> updates = m.getUpdates();
Assert.assertEquals(3, updates.size());
ByteArrayOutputStream baos = new ByteArrayOutputStream();
DataOutputStream out = new DataOutputStream(baos);
ColumnUpdate update = updates.get(0);
Assert.assertEquals("cf", new String(update.getColumnFamily()));
Assert.assertEquals("cq1", new String(update.getColumnQualifier()));
out.writeInt(10);
Assert.assertArrayEquals(baos.toByteArray(), update.getValue());
update = updates.get(1);
Assert.assertEquals("cf", new String(update.getColumnFamily()));
Assert.assertEquals("cq2", new String(update.getColumnQualifier()));
baos.reset();
out.writeInt(20);
Assert.assertArrayEquals(baos.toByteArray(), update.getValue());
update = updates.get(2);
Assert.assertEquals("cf", new String(update.getColumnFamily()));
Assert.assertEquals("cq3", new String(update.getColumnQualifier()));
Assert.assertEquals("value", new String(update.getValue()));
}
use of org.apache.hadoop.hive.serde2.lazy.LazyStruct in project hive by apache.
the class TestAccumuloRowSerializer method testMapSerialization.
@Test
public void testMapSerialization() throws IOException, SerDeException {
List<String> columns = Arrays.asList("row", "col");
List<TypeInfo> types = Arrays.<TypeInfo>asList(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.getMapTypeInfo(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo));
List<String> typeNames = new ArrayList<String>(types.size());
for (TypeInfo type : types) {
typeNames.add(type.getTypeName());
}
Properties tableProperties = new Properties();
tableProperties.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowid,cf:*");
tableProperties.setProperty(serdeConstants.FIELD_DELIM, " ");
tableProperties.setProperty(serdeConstants.COLLECTION_DELIM, ",");
tableProperties.setProperty(serdeConstants.MAPKEY_DELIM, ":");
tableProperties.setProperty(serdeConstants.LIST_COLUMNS, Joiner.on(',').join(columns));
tableProperties.setProperty(serdeConstants.LIST_COLUMN_TYPES, Joiner.on(',').join(typeNames));
AccumuloSerDeParameters accumuloSerDeParams = new AccumuloSerDeParameters(new Configuration(), tableProperties, AccumuloSerDe.class.getSimpleName());
LazySerDeParameters serDeParams = accumuloSerDeParams.getSerDeParameters();
TypeInfo stringTypeInfo = TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.STRING_TYPE_NAME);
LazyStringObjectInspector stringOI = (LazyStringObjectInspector) LazyFactory.createLazyObjectInspector(stringTypeInfo, new byte[] { 0 }, 0, serDeParams.getNullSequence(), serDeParams.isEscaped(), serDeParams.getEscapeChar());
LazyMapObjectInspector mapOI = LazyObjectInspectorFactory.getLazySimpleMapObjectInspector(stringOI, stringOI, (byte) ',', (byte) ':', serDeParams.getNullSequence(), serDeParams.isEscaped(), serDeParams.getEscapeChar());
LazySimpleStructObjectInspector structOI = (LazySimpleStructObjectInspector) LazyObjectInspectorFactory.getLazySimpleStructObjectInspector(columns, Arrays.asList(stringOI, mapOI), (byte) ' ', serDeParams.getNullSequence(), serDeParams.isLastColumnTakesRest(), serDeParams.isEscaped(), serDeParams.getEscapeChar());
AccumuloRowSerializer serializer = new AccumuloRowSerializer(0, serDeParams, accumuloSerDeParams.getColumnMappings(), new ColumnVisibility(), accumuloSerDeParams.getRowIdFactory());
// Create the LazyStruct from the LazyStruct...Inspector
LazyStruct obj = (LazyStruct) LazyFactory.createLazyObject(structOI);
ByteArrayRef byteRef = new ByteArrayRef();
byteRef.setData("row1 cq1:10,cq2:20,cq3:value".getBytes());
obj.init(byteRef, 0, byteRef.getData().length);
Mutation m = (Mutation) serializer.serialize(obj, structOI);
Assert.assertArrayEquals("row1".getBytes(), m.getRow());
List<ColumnUpdate> updates = m.getUpdates();
Assert.assertEquals(3, updates.size());
ColumnUpdate update = updates.get(0);
Assert.assertEquals("cf", new String(update.getColumnFamily()));
Assert.assertEquals("cq1", new String(update.getColumnQualifier()));
Assert.assertEquals("10", new String(update.getValue()));
update = updates.get(1);
Assert.assertEquals("cf", new String(update.getColumnFamily()));
Assert.assertEquals("cq2", new String(update.getColumnQualifier()));
Assert.assertEquals("20", new String(update.getValue()));
update = updates.get(2);
Assert.assertEquals("cf", new String(update.getColumnFamily()));
Assert.assertEquals("cq3", new String(update.getColumnQualifier()));
Assert.assertEquals("value", new String(update.getValue()));
}
Aggregations