use of org.apache.hadoop.hive.accumulo.LazyAccumuloRow in project hive by apache.
the class TestAccumuloSerDe method testArraySerialization.
@Test
public void testArraySerialization() throws Exception {
Properties properties = new Properties();
Configuration conf = new Configuration();
properties.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowID,cf:vals");
properties.setProperty(serdeConstants.LIST_COLUMNS, "row,values");
properties.setProperty(serdeConstants.LIST_COLUMN_TYPES, "string,array<string>");
properties.setProperty(serdeConstants.COLLECTION_DELIM, ":");
// Get one of the default separators to avoid having to set a custom separator
char separator = ':';
serde.initialize(conf, properties);
AccumuloHiveRow row = new AccumuloHiveRow();
row.setRowId("r1");
row.add("cf", "vals", ("value1" + separator + "value2" + separator + "value3").getBytes());
Object obj = serde.deserialize(row);
assertNotNull(obj);
assertTrue(obj instanceof LazyAccumuloRow);
LazyAccumuloRow lazyRow = (LazyAccumuloRow) obj;
Object field0 = lazyRow.getField(0);
assertNotNull(field0);
assertTrue(field0 instanceof LazyString);
assertEquals(row.getRowId(), ((LazyString) field0).getWritableObject().toString());
Object field1 = lazyRow.getField(1);
assertNotNull(field1);
assertTrue(field1 instanceof LazyArray);
LazyArray array = (LazyArray) field1;
List<Object> values = array.getList();
assertEquals(3, values.size());
for (int i = 0; i < 3; i++) {
Object o = values.get(i);
assertNotNull(o);
assertTrue(o instanceof LazyString);
assertEquals("value" + (i + 1), ((LazyString) o).getWritableObject().toString());
}
}
use of org.apache.hadoop.hive.accumulo.LazyAccumuloRow in project hive by apache.
the class TestAccumuloSerDe method deserialization.
@Test
public void deserialization() throws Exception {
Properties properties = new Properties();
Configuration conf = new Configuration();
properties.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowID,cf:f1,cf:f2,cf:f3");
properties.setProperty(serdeConstants.LIST_COLUMNS, "blah,field2,field3,field4");
serde.initialize(conf, properties);
AccumuloHiveRow row = new AccumuloHiveRow();
row.setRowId("r1");
row.add("cf", "f1", "v1".getBytes());
row.add("cf", "f2", "v2".getBytes());
Object obj = serde.deserialize(row);
assertTrue(obj instanceof LazyAccumuloRow);
LazyAccumuloRow lazyRow = (LazyAccumuloRow) obj;
Object field0 = lazyRow.getField(0);
assertNotNull(field0);
assertTrue(field0 instanceof LazyString);
assertEquals(field0.toString(), "r1");
Object field1 = lazyRow.getField(1);
assertNotNull(field1);
assertTrue("Expected instance of LazyString but was " + field1.getClass(), field1 instanceof LazyString);
assertEquals(field1.toString(), "v1");
Object field2 = lazyRow.getField(2);
assertNotNull(field2);
assertTrue(field2 instanceof LazyString);
assertEquals(field2.toString(), "v2");
}
use of org.apache.hadoop.hive.accumulo.LazyAccumuloRow in project hive by apache.
the class TestAccumuloSerDe method testStructOfMapSerialization.
@Test
public void testStructOfMapSerialization() throws IOException, SerDeException {
List<String> columns = Arrays.asList("row", "col");
List<String> structColNames = Arrays.asList("map1", "map2");
TypeInfo mapTypeInfo = TypeInfoFactory.getMapTypeInfo(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo);
// struct<map1:map<string,string>,map2:map<string,string>>,string
List<TypeInfo> types = Arrays.<TypeInfo>asList(TypeInfoFactory.getStructTypeInfo(structColNames, Arrays.asList(mapTypeInfo, mapTypeInfo)), TypeInfoFactory.stringTypeInfo);
Properties tableProperties = new Properties();
tableProperties.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowid,cf:cq");
// Use the default separators [0, 1, 2, 3, ..., 7]
tableProperties.setProperty(serdeConstants.LIST_COLUMNS, Joiner.on(',').join(columns));
tableProperties.setProperty(serdeConstants.LIST_COLUMN_TYPES, Joiner.on(',').join(types));
AccumuloSerDeParameters accumuloSerDeParams = new AccumuloSerDeParameters(new Configuration(), tableProperties, AccumuloSerDe.class.getSimpleName());
LazySerDeParameters serDeParams = accumuloSerDeParams.getSerDeParameters();
byte[] seps = serDeParams.getSeparators();
// struct<map<k:v,k:v>_map<k:v,k:v>>>
TypeInfo stringTypeInfo = TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.STRING_TYPE_NAME);
LazyStringObjectInspector stringOI = (LazyStringObjectInspector) LazyFactory.createLazyObjectInspector(stringTypeInfo, new byte[] { 0 }, 0, serDeParams.getNullSequence(), serDeParams.isEscaped(), serDeParams.getEscapeChar());
LazyMapObjectInspector mapOI = LazyObjectInspectorFactory.getLazySimpleMapObjectInspector(stringOI, stringOI, seps[3], seps[4], serDeParams.getNullSequence(), serDeParams.isEscaped(), serDeParams.getEscapeChar());
LazySimpleStructObjectInspector rowStructOI = (LazySimpleStructObjectInspector) LazyObjectInspectorFactory.getLazySimpleStructObjectInspector(structColNames, Arrays.<ObjectInspector>asList(mapOI, mapOI), (byte) seps[2], serDeParams.getNullSequence(), serDeParams.isLastColumnTakesRest(), serDeParams.isEscaped(), serDeParams.getEscapeChar());
LazySimpleStructObjectInspector structOI = (LazySimpleStructObjectInspector) LazyObjectInspectorFactory.getLazySimpleStructObjectInspector(columns, Arrays.asList(rowStructOI, stringOI), seps[1], serDeParams.getNullSequence(), serDeParams.isLastColumnTakesRest(), serDeParams.isEscaped(), serDeParams.getEscapeChar());
AccumuloRowSerializer serializer = new AccumuloRowSerializer(0, serDeParams, accumuloSerDeParams.getColumnMappings(), new ColumnVisibility(), accumuloSerDeParams.getRowIdFactory());
Map<String, String> map1 = new HashMap<String, String>(), map2 = new HashMap<String, String>();
map1.put("key10", "value10");
map1.put("key11", "value11");
map2.put("key20", "value20");
map2.put("key21", "value21");
ByteArrayRef byteRef = new ByteArrayRef();
// Default separators are 1-indexed (instead of 0-indexed), thus the separator at offset 1 is
// (byte) 2
// The separator for the hive row is \x02, for the row Id struct, \x03, and the maps \x04 and
// \x05
String accumuloRow = "key10\5value10\4key11\5value11\3key20\5value20\4key21\5value21";
LazyStruct entireStruct = (LazyStruct) LazyFactory.createLazyObject(structOI);
byteRef.setData((accumuloRow + "\2foo").getBytes());
entireStruct.init(byteRef, 0, byteRef.getData().length);
Mutation m = serializer.serialize(entireStruct, structOI);
Assert.assertArrayEquals(accumuloRow.getBytes(), m.getRow());
Assert.assertEquals(1, m.getUpdates().size());
ColumnUpdate update = m.getUpdates().get(0);
Assert.assertEquals("cf", new String(update.getColumnFamily()));
Assert.assertEquals("cq", new String(update.getColumnQualifier()));
Assert.assertEquals("foo", new String(update.getValue()));
AccumuloHiveRow haRow = new AccumuloHiveRow(new String(m.getRow()));
haRow.add("cf", "cq", "foo".getBytes());
LazyAccumuloRow lazyAccumuloRow = new LazyAccumuloRow(structOI);
lazyAccumuloRow.init(haRow, accumuloSerDeParams.getColumnMappings(), accumuloSerDeParams.getRowIdFactory());
List<Object> objects = lazyAccumuloRow.getFieldsAsList();
Assert.assertEquals(2, objects.size());
Assert.assertEquals("foo", objects.get(1).toString());
LazyStruct rowStruct = (LazyStruct) objects.get(0);
List<Object> rowObjects = rowStruct.getFieldsAsList();
Assert.assertEquals(2, rowObjects.size());
LazyMap rowMap = (LazyMap) rowObjects.get(0);
Map<?, ?> actualMap = rowMap.getMap();
System.out.println("Actual map 1: " + actualMap);
Map<String, String> actualStringMap = new HashMap<String, String>();
for (Entry<?, ?> entry : actualMap.entrySet()) {
actualStringMap.put(entry.getKey().toString(), entry.getValue().toString());
}
Assert.assertEquals(map1, actualStringMap);
rowMap = (LazyMap) rowObjects.get(1);
actualMap = rowMap.getMap();
System.out.println("Actual map 2: " + actualMap);
actualStringMap = new HashMap<String, String>();
for (Entry<?, ?> entry : actualMap.entrySet()) {
actualStringMap.put(entry.getKey().toString(), entry.getValue().toString());
}
Assert.assertEquals(map2, actualStringMap);
}
use of org.apache.hadoop.hive.accumulo.LazyAccumuloRow in project hive by apache.
the class TestAccumuloSerDe method testMapSerialization.
@Test
public void testMapSerialization() throws Exception {
Properties properties = new Properties();
Configuration conf = new Configuration();
properties.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowID,cf:vals");
properties.setProperty(serdeConstants.LIST_COLUMNS, "row,values");
properties.setProperty(serdeConstants.LIST_COLUMN_TYPES, "string,map<string,string>");
properties.setProperty(serdeConstants.COLLECTION_DELIM, ":");
properties.setProperty(serdeConstants.MAPKEY_DELIM, "=");
// Get one of the default separators to avoid having to set a custom separator
char collectionSeparator = ':', kvSeparator = '=';
serde.initialize(conf, properties);
AccumuloHiveRow row = new AccumuloHiveRow();
row.setRowId("r1");
row.add("cf", "vals", ("k1" + kvSeparator + "v1" + collectionSeparator + "k2" + kvSeparator + "v2" + collectionSeparator + "k3" + kvSeparator + "v3").getBytes());
Object obj = serde.deserialize(row);
assertNotNull(obj);
assertTrue(obj instanceof LazyAccumuloRow);
LazyAccumuloRow lazyRow = (LazyAccumuloRow) obj;
Object field0 = lazyRow.getField(0);
assertNotNull(field0);
assertTrue(field0 instanceof LazyString);
assertEquals(row.getRowId(), ((LazyString) field0).getWritableObject().toString());
Object field1 = lazyRow.getField(1);
assertNotNull(field1);
assertTrue(field1 instanceof LazyMap);
LazyMap map = (LazyMap) field1;
Map<Object, Object> untypedMap = map.getMap();
assertEquals(3, map.getMapSize());
Set<String> expectedKeys = new HashSet<String>();
expectedKeys.add("k1");
expectedKeys.add("k2");
expectedKeys.add("k3");
for (Entry<Object, Object> entry : untypedMap.entrySet()) {
assertNotNull(entry.getKey());
assertTrue(entry.getKey() instanceof LazyString);
LazyString key = (LazyString) entry.getKey();
assertNotNull(entry.getValue());
assertTrue(entry.getValue() instanceof LazyString);
LazyString value = (LazyString) entry.getValue();
String strKey = key.getWritableObject().toString(), strValue = value.getWritableObject().toString();
assertTrue(expectedKeys.remove(strKey));
assertEquals(2, strValue.length());
assertTrue(strValue.startsWith("v"));
assertTrue(strValue.endsWith(Character.toString(strKey.charAt(1))));
}
assertTrue("Did not find expected keys: " + expectedKeys, expectedKeys.isEmpty());
}
use of org.apache.hadoop.hive.accumulo.LazyAccumuloRow in project hive by apache.
the class TestAccumuloSerDe method testCompositeKeyDeserialization.
@Test
public void testCompositeKeyDeserialization() throws Exception {
Properties properties = new Properties();
Configuration conf = new Configuration();
properties.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowID,cf:f1");
properties.setProperty(serdeConstants.LIST_COLUMNS, "row,field1");
properties.setProperty(serdeConstants.LIST_COLUMN_TYPES, "struct<col1:string,col2:string,col3:string>,string");
properties.setProperty(DelimitedAccumuloRowIdFactory.ACCUMULO_COMPOSITE_DELIMITER, "_");
properties.setProperty(AccumuloSerDeParameters.COMPOSITE_ROWID_FACTORY, DelimitedAccumuloRowIdFactory.class.getName());
serde.initialize(conf, properties);
AccumuloHiveRow row = new AccumuloHiveRow();
row.setRowId("p1_p2_p3");
row.add("cf", "f1", "v1".getBytes());
Object obj = serde.deserialize(row);
assertTrue(obj instanceof LazyAccumuloRow);
LazyAccumuloRow lazyRow = (LazyAccumuloRow) obj;
Object field0 = lazyRow.getField(0);
assertNotNull(field0);
assertTrue(field0 instanceof LazyStruct);
LazyStruct struct = (LazyStruct) field0;
List<Object> fields = struct.getFieldsAsList();
assertEquals(3, fields.size());
for (int i = 0; i < fields.size(); i++) {
assertEquals(LazyString.class, fields.get(i).getClass());
assertEquals("p" + (i + 1), fields.get(i).toString());
}
Object field1 = lazyRow.getField(1);
assertNotNull(field1);
assertTrue("Expected instance of LazyString but was " + field1.getClass(), field1 instanceof LazyString);
assertEquals(field1.toString(), "v1");
}
Aggregations