Search in sources :

Example 1 with LazyString

use of org.apache.hadoop.hive.serde2.lazy.LazyString in project presto by prestodb.

the class TestLazyMap method lazyString.

private static LazyString lazyString(String string) {
    LazyString lazyString = new LazyString(LAZY_STRING_OBJECT_INSPECTOR);
    lazyString.init(newByteArrayRef(string), 0, string.length());
    return lazyString;
}
Also used : LazyString(org.apache.hadoop.hive.serde2.lazy.LazyString)

Example 2 with LazyString

use of org.apache.hadoop.hive.serde2.lazy.LazyString in project hive by apache.

the class AccumuloRowSerializer method serializeRowId.

/**
 * Serialize an Accumulo rowid
 */
protected byte[] serializeRowId(Object rowId, StructField rowIdField, ColumnMapping rowIdMapping) throws IOException {
    if (rowId == null) {
        throw new IOException("Accumulo rowId cannot be NULL");
    }
    // Reset the buffer we're going to use
    output.reset();
    ObjectInspector rowIdFieldOI = rowIdField.getFieldObjectInspector();
    String rowIdMappingType = rowIdMapping.getColumnType();
    TypeInfo rowIdTypeInfo = TypeInfoUtils.getTypeInfoFromTypeString(rowIdMappingType);
    if (!rowIdFieldOI.getCategory().equals(ObjectInspector.Category.PRIMITIVE) && rowIdTypeInfo.getCategory() == ObjectInspector.Category.PRIMITIVE) {
        // we always serialize the String type using the escaped algorithm for LazyString
        writeString(output, SerDeUtils.getJSONString(rowId, rowIdFieldOI), PrimitiveObjectInspectorFactory.javaStringObjectInspector);
        return output.toByteArray();
    }
    // use the serialization option switch to write primitive values as either a variable
    // length UTF8 string or a fixed width bytes if serializing in binary format
    getSerializedValue(rowIdFieldOI, rowId, output, rowIdMapping);
    return output.toByteArray();
}
Also used : ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) IOException(java.io.IOException) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)

Example 3 with LazyString

use of org.apache.hadoop.hive.serde2.lazy.LazyString in project hive by apache.

the class TestAccumuloSerDe method deserialization.

@Test
public void deserialization() throws Exception {
    Properties properties = new Properties();
    Configuration conf = new Configuration();
    properties.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowID,cf:f1,cf:f2,cf:f3");
    properties.setProperty(serdeConstants.LIST_COLUMNS, "blah,field2,field3,field4");
    serde.initialize(conf, properties, null);
    AccumuloHiveRow row = new AccumuloHiveRow();
    row.setRowId("r1");
    row.add("cf", "f1", "v1".getBytes());
    row.add("cf", "f2", "v2".getBytes());
    Object obj = serde.deserialize(row);
    assertTrue(obj instanceof LazyAccumuloRow);
    LazyAccumuloRow lazyRow = (LazyAccumuloRow) obj;
    Object field0 = lazyRow.getField(0);
    assertNotNull(field0);
    assertTrue(field0 instanceof LazyString);
    assertEquals(field0.toString(), "r1");
    Object field1 = lazyRow.getField(1);
    assertNotNull(field1);
    assertTrue("Expected instance of LazyString but was " + field1.getClass(), field1 instanceof LazyString);
    assertEquals(field1.toString(), "v1");
    Object field2 = lazyRow.getField(2);
    assertNotNull(field2);
    assertTrue(field2 instanceof LazyString);
    assertEquals(field2.toString(), "v2");
}
Also used : LazyString(org.apache.hadoop.hive.serde2.lazy.LazyString) Configuration(org.apache.hadoop.conf.Configuration) LazyAccumuloRow(org.apache.hadoop.hive.accumulo.LazyAccumuloRow) Properties(java.util.Properties) AccumuloHiveRow(org.apache.hadoop.hive.accumulo.AccumuloHiveRow) Test(org.junit.Test)

Example 4 with LazyString

use of org.apache.hadoop.hive.serde2.lazy.LazyString in project hive by apache.

the class TestAccumuloSerDe method testMapSerialization.

@Test
public void testMapSerialization() throws Exception {
    Properties properties = new Properties();
    Configuration conf = new Configuration();
    properties.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowID,cf:vals");
    properties.setProperty(serdeConstants.LIST_COLUMNS, "row,values");
    properties.setProperty(serdeConstants.LIST_COLUMN_TYPES, "string,map<string,string>");
    properties.setProperty(serdeConstants.COLLECTION_DELIM, ":");
    properties.setProperty(serdeConstants.MAPKEY_DELIM, "=");
    // Get one of the default separators to avoid having to set a custom separator
    char collectionSeparator = ':', kvSeparator = '=';
    serde.initialize(conf, properties, null);
    AccumuloHiveRow row = new AccumuloHiveRow();
    row.setRowId("r1");
    row.add("cf", "vals", ("k1" + kvSeparator + "v1" + collectionSeparator + "k2" + kvSeparator + "v2" + collectionSeparator + "k3" + kvSeparator + "v3").getBytes());
    Object obj = serde.deserialize(row);
    assertNotNull(obj);
    assertTrue(obj instanceof LazyAccumuloRow);
    LazyAccumuloRow lazyRow = (LazyAccumuloRow) obj;
    Object field0 = lazyRow.getField(0);
    assertNotNull(field0);
    assertTrue(field0 instanceof LazyString);
    assertEquals(row.getRowId(), ((LazyString) field0).getWritableObject().toString());
    Object field1 = lazyRow.getField(1);
    assertNotNull(field1);
    assertTrue(field1 instanceof LazyMap);
    LazyMap map = (LazyMap) field1;
    Map<Object, Object> untypedMap = map.getMap();
    assertEquals(3, map.getMapSize());
    Set<String> expectedKeys = new HashSet<String>();
    expectedKeys.add("k1");
    expectedKeys.add("k2");
    expectedKeys.add("k3");
    for (Entry<Object, Object> entry : untypedMap.entrySet()) {
        assertNotNull(entry.getKey());
        assertTrue(entry.getKey() instanceof LazyString);
        LazyString key = (LazyString) entry.getKey();
        assertNotNull(entry.getValue());
        assertTrue(entry.getValue() instanceof LazyString);
        LazyString value = (LazyString) entry.getValue();
        String strKey = key.getWritableObject().toString(), strValue = value.getWritableObject().toString();
        assertTrue(expectedKeys.remove(strKey));
        assertEquals(2, strValue.length());
        assertTrue(strValue.startsWith("v"));
        assertTrue(strValue.endsWith(Character.toString(strKey.charAt(1))));
    }
    assertTrue("Did not find expected keys: " + expectedKeys, expectedKeys.isEmpty());
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) LazyAccumuloRow(org.apache.hadoop.hive.accumulo.LazyAccumuloRow) LazyMap(org.apache.hadoop.hive.serde2.lazy.LazyMap) LazyString(org.apache.hadoop.hive.serde2.lazy.LazyString) Properties(java.util.Properties) AccumuloHiveRow(org.apache.hadoop.hive.accumulo.AccumuloHiveRow) LazyString(org.apache.hadoop.hive.serde2.lazy.LazyString) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 5 with LazyString

use of org.apache.hadoop.hive.serde2.lazy.LazyString in project hive by apache.

the class TestAccumuloSerDe method testArraySerialization.

@Test
public void testArraySerialization() throws Exception {
    Properties properties = new Properties();
    Configuration conf = new Configuration();
    properties.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowID,cf:vals");
    properties.setProperty(serdeConstants.LIST_COLUMNS, "row,values");
    properties.setProperty(serdeConstants.LIST_COLUMN_TYPES, "string,array<string>");
    properties.setProperty(serdeConstants.COLLECTION_DELIM, ":");
    // Get one of the default separators to avoid having to set a custom separator
    char separator = ':';
    serde.initialize(conf, properties, null);
    AccumuloHiveRow row = new AccumuloHiveRow();
    row.setRowId("r1");
    row.add("cf", "vals", ("value1" + separator + "value2" + separator + "value3").getBytes());
    Object obj = serde.deserialize(row);
    assertNotNull(obj);
    assertTrue(obj instanceof LazyAccumuloRow);
    LazyAccumuloRow lazyRow = (LazyAccumuloRow) obj;
    Object field0 = lazyRow.getField(0);
    assertNotNull(field0);
    assertTrue(field0 instanceof LazyString);
    assertEquals(row.getRowId(), ((LazyString) field0).getWritableObject().toString());
    Object field1 = lazyRow.getField(1);
    assertNotNull(field1);
    assertTrue(field1 instanceof LazyArray);
    LazyArray array = (LazyArray) field1;
    List<Object> values = array.getList();
    assertEquals(3, values.size());
    for (int i = 0; i < 3; i++) {
        Object o = values.get(i);
        assertNotNull(o);
        assertTrue(o instanceof LazyString);
        assertEquals("value" + (i + 1), ((LazyString) o).getWritableObject().toString());
    }
}
Also used : LazyString(org.apache.hadoop.hive.serde2.lazy.LazyString) Configuration(org.apache.hadoop.conf.Configuration) LazyAccumuloRow(org.apache.hadoop.hive.accumulo.LazyAccumuloRow) LazyArray(org.apache.hadoop.hive.serde2.lazy.LazyArray) Properties(java.util.Properties) AccumuloHiveRow(org.apache.hadoop.hive.accumulo.AccumuloHiveRow) Test(org.junit.Test)

Aggregations

Test (org.junit.Test)12 Text (org.apache.hadoop.io.Text)11 LazyString (org.apache.hadoop.hive.serde2.lazy.LazyString)10 Configuration (org.apache.hadoop.conf.Configuration)8 Properties (java.util.Properties)6 AccumuloHiveRow (org.apache.hadoop.hive.accumulo.AccumuloHiveRow)6 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)6 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)6 LazyAccumuloRow (org.apache.hadoop.hive.accumulo.LazyAccumuloRow)4 ByteArrayOutputStream (java.io.ByteArrayOutputStream)3 ArrayList (java.util.ArrayList)3 Map (java.util.Map)3 Path (org.apache.hadoop.fs.Path)3 Date (org.apache.hadoop.hive.common.type.Date)3 ByteWritable (org.apache.hadoop.hive.serde2.io.ByteWritable)3 LazySimpleStructObjectInspector (org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector)3 ListObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector)3 MapObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector)3 PrimitiveObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)3 DataOutputStream (java.io.DataOutputStream)2