Search in sources :

Example 1 with AccumuloHiveRow

use of org.apache.hadoop.hive.accumulo.AccumuloHiveRow in project hive by apache.

the class TestAccumuloSerDe method deserialization.

@Test
public void deserialization() throws Exception {
    Properties properties = new Properties();
    Configuration conf = new Configuration();
    properties.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowID,cf:f1,cf:f2,cf:f3");
    properties.setProperty(serdeConstants.LIST_COLUMNS, "blah,field2,field3,field4");
    serde.initialize(conf, properties, null);
    AccumuloHiveRow row = new AccumuloHiveRow();
    row.setRowId("r1");
    row.add("cf", "f1", "v1".getBytes());
    row.add("cf", "f2", "v2".getBytes());
    Object obj = serde.deserialize(row);
    assertTrue(obj instanceof LazyAccumuloRow);
    LazyAccumuloRow lazyRow = (LazyAccumuloRow) obj;
    Object field0 = lazyRow.getField(0);
    assertNotNull(field0);
    assertTrue(field0 instanceof LazyString);
    assertEquals(field0.toString(), "r1");
    Object field1 = lazyRow.getField(1);
    assertNotNull(field1);
    assertTrue("Expected instance of LazyString but was " + field1.getClass(), field1 instanceof LazyString);
    assertEquals(field1.toString(), "v1");
    Object field2 = lazyRow.getField(2);
    assertNotNull(field2);
    assertTrue(field2 instanceof LazyString);
    assertEquals(field2.toString(), "v2");
}
Also used : LazyString(org.apache.hadoop.hive.serde2.lazy.LazyString) Configuration(org.apache.hadoop.conf.Configuration) LazyAccumuloRow(org.apache.hadoop.hive.accumulo.LazyAccumuloRow) Properties(java.util.Properties) AccumuloHiveRow(org.apache.hadoop.hive.accumulo.AccumuloHiveRow) Test(org.junit.Test)

Example 2 with AccumuloHiveRow

use of org.apache.hadoop.hive.accumulo.AccumuloHiveRow in project hive by apache.

the class TestAccumuloSerDe method testMapSerialization.

@Test
public void testMapSerialization() throws Exception {
    Properties properties = new Properties();
    Configuration conf = new Configuration();
    properties.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowID,cf:vals");
    properties.setProperty(serdeConstants.LIST_COLUMNS, "row,values");
    properties.setProperty(serdeConstants.LIST_COLUMN_TYPES, "string,map<string,string>");
    properties.setProperty(serdeConstants.COLLECTION_DELIM, ":");
    properties.setProperty(serdeConstants.MAPKEY_DELIM, "=");
    // Get one of the default separators to avoid having to set a custom separator
    char collectionSeparator = ':', kvSeparator = '=';
    serde.initialize(conf, properties, null);
    AccumuloHiveRow row = new AccumuloHiveRow();
    row.setRowId("r1");
    row.add("cf", "vals", ("k1" + kvSeparator + "v1" + collectionSeparator + "k2" + kvSeparator + "v2" + collectionSeparator + "k3" + kvSeparator + "v3").getBytes());
    Object obj = serde.deserialize(row);
    assertNotNull(obj);
    assertTrue(obj instanceof LazyAccumuloRow);
    LazyAccumuloRow lazyRow = (LazyAccumuloRow) obj;
    Object field0 = lazyRow.getField(0);
    assertNotNull(field0);
    assertTrue(field0 instanceof LazyString);
    assertEquals(row.getRowId(), ((LazyString) field0).getWritableObject().toString());
    Object field1 = lazyRow.getField(1);
    assertNotNull(field1);
    assertTrue(field1 instanceof LazyMap);
    LazyMap map = (LazyMap) field1;
    Map<Object, Object> untypedMap = map.getMap();
    assertEquals(3, map.getMapSize());
    Set<String> expectedKeys = new HashSet<String>();
    expectedKeys.add("k1");
    expectedKeys.add("k2");
    expectedKeys.add("k3");
    for (Entry<Object, Object> entry : untypedMap.entrySet()) {
        assertNotNull(entry.getKey());
        assertTrue(entry.getKey() instanceof LazyString);
        LazyString key = (LazyString) entry.getKey();
        assertNotNull(entry.getValue());
        assertTrue(entry.getValue() instanceof LazyString);
        LazyString value = (LazyString) entry.getValue();
        String strKey = key.getWritableObject().toString(), strValue = value.getWritableObject().toString();
        assertTrue(expectedKeys.remove(strKey));
        assertEquals(2, strValue.length());
        assertTrue(strValue.startsWith("v"));
        assertTrue(strValue.endsWith(Character.toString(strKey.charAt(1))));
    }
    assertTrue("Did not find expected keys: " + expectedKeys, expectedKeys.isEmpty());
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) LazyAccumuloRow(org.apache.hadoop.hive.accumulo.LazyAccumuloRow) LazyMap(org.apache.hadoop.hive.serde2.lazy.LazyMap) LazyString(org.apache.hadoop.hive.serde2.lazy.LazyString) Properties(java.util.Properties) AccumuloHiveRow(org.apache.hadoop.hive.accumulo.AccumuloHiveRow) LazyString(org.apache.hadoop.hive.serde2.lazy.LazyString) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 3 with AccumuloHiveRow

use of org.apache.hadoop.hive.accumulo.AccumuloHiveRow in project hive by apache.

the class TestAccumuloSerDe method testStructOfMapSerialization.

@Test
public void testStructOfMapSerialization() throws IOException, SerDeException {
    List<String> columns = Arrays.asList("row", "col");
    List<String> structColNames = Arrays.asList("map1", "map2");
    TypeInfo mapTypeInfo = TypeInfoFactory.getMapTypeInfo(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo);
    // struct<map1:map<string,string>,map2:map<string,string>>,string
    List<TypeInfo> types = Arrays.<TypeInfo>asList(TypeInfoFactory.getStructTypeInfo(structColNames, Arrays.asList(mapTypeInfo, mapTypeInfo)), TypeInfoFactory.stringTypeInfo);
    Properties tableProperties = new Properties();
    tableProperties.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowid,cf:cq");
    // Use the default separators [0, 1, 2, 3, ..., 7]
    tableProperties.setProperty(serdeConstants.LIST_COLUMNS, Joiner.on(',').join(columns));
    tableProperties.setProperty(serdeConstants.LIST_COLUMN_TYPES, Joiner.on(',').join(types));
    AccumuloSerDeParameters accumuloSerDeParams = new AccumuloSerDeParameters(new Configuration(), tableProperties, AccumuloSerDe.class.getSimpleName());
    LazySerDeParameters serDeParams = accumuloSerDeParams.getSerDeParameters();
    byte[] seps = serDeParams.getSeparators();
    // struct<map<k:v,k:v>_map<k:v,k:v>>>
    TypeInfo stringTypeInfo = TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.STRING_TYPE_NAME);
    LazyStringObjectInspector stringOI = (LazyStringObjectInspector) LazyFactory.createLazyObjectInspector(stringTypeInfo, new byte[] { 0 }, 0, serDeParams.getNullSequence(), serDeParams.isEscaped(), serDeParams.getEscapeChar());
    LazyMapObjectInspector mapOI = LazyObjectInspectorFactory.getLazySimpleMapObjectInspector(stringOI, stringOI, seps[3], seps[4], serDeParams.getNullSequence(), serDeParams.isEscaped(), serDeParams.getEscapeChar());
    LazySimpleStructObjectInspector rowStructOI = (LazySimpleStructObjectInspector) LazyObjectInspectorFactory.getLazySimpleStructObjectInspector(structColNames, Arrays.<ObjectInspector>asList(mapOI, mapOI), (byte) seps[2], serDeParams.getNullSequence(), serDeParams.isLastColumnTakesRest(), serDeParams.isEscaped(), serDeParams.getEscapeChar());
    LazySimpleStructObjectInspector structOI = (LazySimpleStructObjectInspector) LazyObjectInspectorFactory.getLazySimpleStructObjectInspector(columns, Arrays.asList(rowStructOI, stringOI), seps[1], serDeParams.getNullSequence(), serDeParams.isLastColumnTakesRest(), serDeParams.isEscaped(), serDeParams.getEscapeChar());
    AccumuloRowSerializer serializer = new AccumuloRowSerializer(0, serDeParams, accumuloSerDeParams.getColumnMappings(), new ColumnVisibility(), accumuloSerDeParams.getRowIdFactory());
    Map<String, String> map1 = new HashMap<String, String>(), map2 = new HashMap<String, String>();
    map1.put("key10", "value10");
    map1.put("key11", "value11");
    map2.put("key20", "value20");
    map2.put("key21", "value21");
    ByteArrayRef byteRef = new ByteArrayRef();
    // Default separators are 1-indexed (instead of 0-indexed), thus the separator at offset 1 is
    // (byte) 2
    // The separator for the hive row is \x02, for the row Id struct, \x03, and the maps \x04 and
    // \x05
    String accumuloRow = "key10\5value10\4key11\5value11\3key20\5value20\4key21\5value21";
    LazyStruct entireStruct = (LazyStruct) LazyFactory.createLazyObject(structOI);
    byteRef.setData((accumuloRow + "\2foo").getBytes());
    entireStruct.init(byteRef, 0, byteRef.getData().length);
    Mutation m = serializer.serialize(entireStruct, structOI);
    Assert.assertArrayEquals(accumuloRow.getBytes(), m.getRow());
    Assert.assertEquals(1, m.getUpdates().size());
    ColumnUpdate update = m.getUpdates().get(0);
    Assert.assertEquals("cf", new String(update.getColumnFamily()));
    Assert.assertEquals("cq", new String(update.getColumnQualifier()));
    Assert.assertEquals("foo", new String(update.getValue()));
    AccumuloHiveRow haRow = new AccumuloHiveRow(new String(m.getRow()));
    haRow.add("cf", "cq", "foo".getBytes());
    LazyAccumuloRow lazyAccumuloRow = new LazyAccumuloRow(structOI);
    lazyAccumuloRow.init(haRow, accumuloSerDeParams.getColumnMappings(), accumuloSerDeParams.getRowIdFactory());
    List<Object> objects = lazyAccumuloRow.getFieldsAsList();
    Assert.assertEquals(2, objects.size());
    Assert.assertEquals("foo", objects.get(1).toString());
    LazyStruct rowStruct = (LazyStruct) objects.get(0);
    List<Object> rowObjects = rowStruct.getFieldsAsList();
    Assert.assertEquals(2, rowObjects.size());
    LazyMap rowMap = (LazyMap) rowObjects.get(0);
    Map<?, ?> actualMap = rowMap.getMap();
    System.out.println("Actual map 1: " + actualMap);
    Map<String, String> actualStringMap = new HashMap<String, String>();
    for (Entry<?, ?> entry : actualMap.entrySet()) {
        actualStringMap.put(entry.getKey().toString(), entry.getValue().toString());
    }
    Assert.assertEquals(map1, actualStringMap);
    rowMap = (LazyMap) rowObjects.get(1);
    actualMap = rowMap.getMap();
    System.out.println("Actual map 2: " + actualMap);
    actualStringMap = new HashMap<String, String>();
    for (Entry<?, ?> entry : actualMap.entrySet()) {
        actualStringMap.put(entry.getKey().toString(), entry.getValue().toString());
    }
    Assert.assertEquals(map2, actualStringMap);
}
Also used : ColumnUpdate(org.apache.accumulo.core.data.ColumnUpdate) Configuration(org.apache.hadoop.conf.Configuration) LazySerDeParameters(org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters) HashMap(java.util.HashMap) LazyString(org.apache.hadoop.hive.serde2.lazy.LazyString) Properties(java.util.Properties) AccumuloHiveRow(org.apache.hadoop.hive.accumulo.AccumuloHiveRow) ColumnVisibility(org.apache.accumulo.core.security.ColumnVisibility) LazyStruct(org.apache.hadoop.hive.serde2.lazy.LazyStruct) LazySimpleStructObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector) LazyStringObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyStringObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) LazyMapObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyMapObjectInspector) LazyStringObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyStringObjectInspector) LazySimpleStructObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector) LazyAccumuloRow(org.apache.hadoop.hive.accumulo.LazyAccumuloRow) LazyMap(org.apache.hadoop.hive.serde2.lazy.LazyMap) LazyMapObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyMapObjectInspector) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) ByteArrayRef(org.apache.hadoop.hive.serde2.lazy.ByteArrayRef) Mutation(org.apache.accumulo.core.data.Mutation) Test(org.junit.Test)

Example 4 with AccumuloHiveRow

use of org.apache.hadoop.hive.accumulo.AccumuloHiveRow in project hive by apache.

the class TestAccumuloSerDe method testArraySerialization.

@Test
public void testArraySerialization() throws Exception {
    Properties properties = new Properties();
    Configuration conf = new Configuration();
    properties.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowID,cf:vals");
    properties.setProperty(serdeConstants.LIST_COLUMNS, "row,values");
    properties.setProperty(serdeConstants.LIST_COLUMN_TYPES, "string,array<string>");
    properties.setProperty(serdeConstants.COLLECTION_DELIM, ":");
    // Get one of the default separators to avoid having to set a custom separator
    char separator = ':';
    serde.initialize(conf, properties, null);
    AccumuloHiveRow row = new AccumuloHiveRow();
    row.setRowId("r1");
    row.add("cf", "vals", ("value1" + separator + "value2" + separator + "value3").getBytes());
    Object obj = serde.deserialize(row);
    assertNotNull(obj);
    assertTrue(obj instanceof LazyAccumuloRow);
    LazyAccumuloRow lazyRow = (LazyAccumuloRow) obj;
    Object field0 = lazyRow.getField(0);
    assertNotNull(field0);
    assertTrue(field0 instanceof LazyString);
    assertEquals(row.getRowId(), ((LazyString) field0).getWritableObject().toString());
    Object field1 = lazyRow.getField(1);
    assertNotNull(field1);
    assertTrue(field1 instanceof LazyArray);
    LazyArray array = (LazyArray) field1;
    List<Object> values = array.getList();
    assertEquals(3, values.size());
    for (int i = 0; i < 3; i++) {
        Object o = values.get(i);
        assertNotNull(o);
        assertTrue(o instanceof LazyString);
        assertEquals("value" + (i + 1), ((LazyString) o).getWritableObject().toString());
    }
}
Also used : LazyString(org.apache.hadoop.hive.serde2.lazy.LazyString) Configuration(org.apache.hadoop.conf.Configuration) LazyAccumuloRow(org.apache.hadoop.hive.accumulo.LazyAccumuloRow) LazyArray(org.apache.hadoop.hive.serde2.lazy.LazyArray) Properties(java.util.Properties) AccumuloHiveRow(org.apache.hadoop.hive.accumulo.AccumuloHiveRow) Test(org.junit.Test)

Example 5 with AccumuloHiveRow

use of org.apache.hadoop.hive.accumulo.AccumuloHiveRow in project hive by apache.

the class TestAccumuloSerDe method testCompositeKeyDeserialization.

@Test
public void testCompositeKeyDeserialization() throws Exception {
    Properties properties = new Properties();
    Configuration conf = new Configuration();
    properties.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowID,cf:f1");
    properties.setProperty(serdeConstants.LIST_COLUMNS, "row,field1");
    properties.setProperty(serdeConstants.LIST_COLUMN_TYPES, "struct<col1:string,col2:string,col3:string>,string");
    properties.setProperty(DelimitedAccumuloRowIdFactory.ACCUMULO_COMPOSITE_DELIMITER, "_");
    properties.setProperty(AccumuloSerDeParameters.COMPOSITE_ROWID_FACTORY, DelimitedAccumuloRowIdFactory.class.getName());
    serde.initialize(conf, properties, null);
    AccumuloHiveRow row = new AccumuloHiveRow();
    row.setRowId("p1_p2_p3");
    row.add("cf", "f1", "v1".getBytes());
    Object obj = serde.deserialize(row);
    assertTrue(obj instanceof LazyAccumuloRow);
    LazyAccumuloRow lazyRow = (LazyAccumuloRow) obj;
    Object field0 = lazyRow.getField(0);
    assertNotNull(field0);
    assertTrue(field0 instanceof LazyStruct);
    LazyStruct struct = (LazyStruct) field0;
    List<Object> fields = struct.getFieldsAsList();
    assertEquals(3, fields.size());
    for (int i = 0; i < fields.size(); i++) {
        assertEquals(LazyString.class, fields.get(i).getClass());
        assertEquals("p" + (i + 1), fields.get(i).toString());
    }
    Object field1 = lazyRow.getField(1);
    assertNotNull(field1);
    assertTrue("Expected instance of LazyString but was " + field1.getClass(), field1 instanceof LazyString);
    assertEquals(field1.toString(), "v1");
}
Also used : LazyString(org.apache.hadoop.hive.serde2.lazy.LazyString) Configuration(org.apache.hadoop.conf.Configuration) LazyAccumuloRow(org.apache.hadoop.hive.accumulo.LazyAccumuloRow) Properties(java.util.Properties) LazyStruct(org.apache.hadoop.hive.serde2.lazy.LazyStruct) AccumuloHiveRow(org.apache.hadoop.hive.accumulo.AccumuloHiveRow) Test(org.junit.Test)

Aggregations

AccumuloHiveRow (org.apache.hadoop.hive.accumulo.AccumuloHiveRow)12 Test (org.junit.Test)12 LazyString (org.apache.hadoop.hive.serde2.lazy.LazyString)7 Properties (java.util.Properties)6 Configuration (org.apache.hadoop.conf.Configuration)6 Path (org.apache.hadoop.fs.Path)6 LazyAccumuloRow (org.apache.hadoop.hive.accumulo.LazyAccumuloRow)6 Text (org.apache.hadoop.io.Text)6 InputSplit (org.apache.hadoop.mapred.InputSplit)6 Mutation (org.apache.accumulo.core.data.Mutation)4 BatchWriter (org.apache.accumulo.core.client.BatchWriter)3 BatchWriterConfig (org.apache.accumulo.core.client.BatchWriterConfig)3 Value (org.apache.accumulo.core.data.Value)3 Authorizations (org.apache.accumulo.core.security.Authorizations)3 ByteArrayRef (org.apache.hadoop.hive.serde2.lazy.ByteArrayRef)3 ByteArrayOutputStream (java.io.ByteArrayOutputStream)2 Connector (org.apache.accumulo.core.client.Connector)2 MockInstance (org.apache.accumulo.core.client.mock.MockInstance)2 PasswordToken (org.apache.accumulo.core.client.security.tokens.PasswordToken)2 Key (org.apache.accumulo.core.data.Key)2