Search in sources :

Example 6 with LazyMapObjectInspector

use of org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyMapObjectInspector in project hive by apache.

the class TestLazyHBaseCellMap method testInitColumnPrefix.

@Test
public void testInitColumnPrefix() throws Exception {
    Text nullSequence = new Text("\\N");
    ObjectInspector oi = LazyFactory.createLazyObjectInspector(TypeInfoUtils.getTypeInfosFromTypeString("map<string,string>").get(0), new byte[] { (byte) 1, (byte) 2 }, 0, nullSequence, false, (byte) 0);
    LazyHBaseCellMap b = new LazyHBaseCellMap((LazyMapObjectInspector) oi);
    // Initialize a result
    Cell[] cells = new KeyValue[2];
    final String col1 = "1";
    final String col2 = "2";
    cells[0] = new KeyValue(TEST_ROW, COLUMN_FAMILY, Bytes.toBytes(QUAL_PREFIX + col1), Bytes.toBytes("cfacol1"));
    cells[1] = new KeyValue(TEST_ROW, COLUMN_FAMILY, Bytes.toBytes(QUAL_PREFIX + col2), Bytes.toBytes("cfacol2"));
    Result r = Result.create(cells);
    List<Boolean> mapBinaryStorage = new ArrayList<Boolean>();
    mapBinaryStorage.add(false);
    mapBinaryStorage.add(false);
    b.init(r, COLUMN_FAMILY, mapBinaryStorage, Bytes.toBytes(QUAL_PREFIX), true);
    assertNotNull(b.getMapValueElement(new Text(col1)));
    assertNotNull(b.getMapValueElement(new Text(col2)));
}
Also used : LazyMapObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyMapObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) KeyValue(org.apache.hadoop.hbase.KeyValue) ArrayList(java.util.ArrayList) Text(org.apache.hadoop.io.Text) Cell(org.apache.hadoop.hbase.Cell) Result(org.apache.hadoop.hbase.client.Result) Test(org.junit.Test)

Example 7 with LazyMapObjectInspector

use of org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyMapObjectInspector in project hive by apache.

the class TestLazyHBaseObject method testLazyHBaseCellMap2.

/**
 * Test the LazyMap class with String-to-String.
 * @throws SerDeException
 */
public void testLazyHBaseCellMap2() throws SerDeException {
    // Map of String to String
    Text nullSequence = new Text("\\N");
    ObjectInspector oi = LazyFactory.createLazyObjectInspector(TypeInfoUtils.getTypeInfosFromTypeString("map<string,string>").get(0), new byte[] { (byte) '#', (byte) '\t' }, 0, nullSequence, false, (byte) 0);
    LazyHBaseCellMap b = new LazyHBaseCellMap((LazyMapObjectInspector) oi);
    // Initialize a result
    List<Cell> kvs = new ArrayList<Cell>();
    kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfa"), Bytes.toBytes("col1"), Bytes.toBytes("cfacol1")));
    kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfa"), Bytes.toBytes("col2"), Bytes.toBytes("cfacol2")));
    kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfb"), Bytes.toBytes("2"), Bytes.toBytes("d\tf")));
    kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfb"), Bytes.toBytes("-1"), Bytes.toBytes("")));
    kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfb"), Bytes.toBytes("0"), Bytes.toBytes("0")));
    kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfb"), Bytes.toBytes("8"), Bytes.toBytes("abc")));
    kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfc"), Bytes.toBytes("col3"), Bytes.toBytes("cfccol3")));
    Result r = Result.create(kvs);
    List<Boolean> mapBinaryStorage = new ArrayList<Boolean>();
    mapBinaryStorage.add(false);
    mapBinaryStorage.add(false);
    b.init(r, "cfb".getBytes(), mapBinaryStorage);
    assertEquals(new Text("d\tf"), ((LazyString) b.getMapValueElement(new Text("2"))).getWritableObject());
    assertNull(b.getMapValueElement(new Text("-1")));
    assertEquals(new Text("0"), ((LazyString) b.getMapValueElement(new Text("0"))).getWritableObject());
    assertEquals(new Text("abc"), ((LazyString) b.getMapValueElement(new Text("8"))).getWritableObject());
    assertNull(b.getMapValueElement(new Text("-")));
    assertEquals("{'0':'0','2':'d\\tf','8':'abc'}".replace('\'', '\"'), SerDeUtils.getJSONString(b, oi));
}
Also used : LazySimpleStructObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) LazyMapObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyMapObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) KeyValue(org.apache.hadoop.hbase.KeyValue) ArrayList(java.util.ArrayList) Text(org.apache.hadoop.io.Text) Cell(org.apache.hadoop.hbase.Cell) Result(org.apache.hadoop.hbase.client.Result)

Example 8 with LazyMapObjectInspector

use of org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyMapObjectInspector in project hive by apache.

the class TestHiveAccumuloTableOutputFormat method testWriteMap.

@Test
public void testWriteMap() throws Exception {
    Instance inst = new MockInstance(test.getMethodName());
    Connector conn = inst.getConnector("root", new PasswordToken(""));
    HiveAccumuloTableOutputFormat outputFormat = new HiveAccumuloTableOutputFormat();
    String table = test.getMethodName();
    conn.tableOperations().create(table);
    JobConf conf = new JobConf();
    conf.set(AccumuloConnectionParameters.INSTANCE_NAME, inst.getInstanceName());
    conf.set(AccumuloConnectionParameters.USER_NAME, "root");
    conf.set(AccumuloConnectionParameters.USER_PASS, "");
    conf.setBoolean(AccumuloConnectionParameters.USE_MOCK_INSTANCE, true);
    conf.set(AccumuloConnectionParameters.TABLE_NAME, test.getMethodName());
    FileSystem local = FileSystem.getLocal(conf);
    outputFormat.checkOutputSpecs(local, conf);
    RecordWriter<Text, Mutation> recordWriter = outputFormat.getRecordWriter(local, conf, null, null);
    List<String> names = Arrays.asList("row", "col1");
    List<TypeInfo> types = Arrays.<TypeInfo>asList(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo);
    Properties tableProperties = new Properties();
    tableProperties.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowID,cf:*");
    tableProperties.setProperty(serdeConstants.FIELD_DELIM, " ");
    tableProperties.setProperty(serdeConstants.LIST_COLUMNS, Joiner.on(',').join(names));
    tableProperties.setProperty(serdeConstants.LIST_COLUMN_TYPES, Joiner.on(',').join(types));
    AccumuloSerDeParameters accumuloSerDeParams = new AccumuloSerDeParameters(new Configuration(), tableProperties, AccumuloSerDe.class.getSimpleName());
    LazySerDeParameters serDeParams = accumuloSerDeParams.getSerDeParameters();
    AccumuloRowSerializer serializer = new AccumuloRowSerializer(0, serDeParams, accumuloSerDeParams.getColumnMappings(), AccumuloSerDeParameters.DEFAULT_VISIBILITY_LABEL, accumuloSerDeParams.getRowIdFactory());
    TypeInfo stringTypeInfo = TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.STRING_TYPE_NAME);
    LazyStringObjectInspector stringOI = (LazyStringObjectInspector) LazyFactory.createLazyObjectInspector(stringTypeInfo, new byte[] { 0 }, 0, serDeParams.getNullSequence(), serDeParams.isEscaped(), serDeParams.getEscapeChar());
    LazyMapObjectInspector mapOI = LazyObjectInspectorFactory.getLazySimpleMapObjectInspector(stringOI, stringOI, (byte) ',', (byte) ':', serDeParams.getNullSequence(), serDeParams.isEscaped(), serDeParams.getEscapeChar());
    LazySimpleStructObjectInspector structOI = (LazySimpleStructObjectInspector) LazyObjectInspectorFactory.getLazySimpleStructObjectInspector(Arrays.asList("row", "data"), Arrays.asList(stringOI, mapOI), (byte) ' ', serDeParams.getNullSequence(), serDeParams.isLastColumnTakesRest(), serDeParams.isEscaped(), serDeParams.getEscapeChar());
    LazyStruct struct = (LazyStruct) LazyFactory.createLazyObject(structOI);
    ByteArrayRef bytes = new ByteArrayRef();
    bytes.setData("row cq1:value1,cq2:value2".getBytes());
    struct.init(bytes, 0, bytes.getData().length);
    // Serialize the struct into a mutation
    Mutation m = serializer.serialize(struct, structOI);
    // Write the mutation
    recordWriter.write(new Text(table), m);
    // Close the writer
    recordWriter.close(null);
    Iterator<Entry<Key, Value>> iter = conn.createScanner(table, new Authorizations()).iterator();
    Assert.assertTrue("Iterator did not have an element as expected", iter.hasNext());
    Entry<Key, Value> entry = iter.next();
    Key k = entry.getKey();
    Value v = entry.getValue();
    Assert.assertEquals("row", k.getRow().toString());
    Assert.assertEquals("cf", k.getColumnFamily().toString());
    Assert.assertEquals("cq1", k.getColumnQualifier().toString());
    Assert.assertEquals(AccumuloSerDeParameters.DEFAULT_VISIBILITY_LABEL, k.getColumnVisibilityParsed());
    Assert.assertEquals("value1", new String(v.get()));
    Assert.assertTrue("Iterator did not have an element as expected", iter.hasNext());
    entry = iter.next();
    k = entry.getKey();
    v = entry.getValue();
    Assert.assertEquals("row", k.getRow().toString());
    Assert.assertEquals("cf", k.getColumnFamily().toString());
    Assert.assertEquals("cq2", k.getColumnQualifier().toString());
    Assert.assertEquals(AccumuloSerDeParameters.DEFAULT_VISIBILITY_LABEL, k.getColumnVisibilityParsed());
    Assert.assertEquals("value2", new String(v.get()));
    Assert.assertFalse("Iterator unexpectedly had more data", iter.hasNext());
}
Also used : Connector(org.apache.accumulo.core.client.Connector) Configuration(org.apache.hadoop.conf.Configuration) MockInstance(org.apache.accumulo.core.client.mock.MockInstance) Instance(org.apache.accumulo.core.client.Instance) LazySerDeParameters(org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters) Properties(java.util.Properties) PasswordToken(org.apache.accumulo.core.client.security.tokens.PasswordToken) Entry(java.util.Map.Entry) MockInstance(org.apache.accumulo.core.client.mock.MockInstance) FileSystem(org.apache.hadoop.fs.FileSystem) JobConf(org.apache.hadoop.mapred.JobConf) AccumuloRowSerializer(org.apache.hadoop.hive.accumulo.serde.AccumuloRowSerializer) LazyStruct(org.apache.hadoop.hive.serde2.lazy.LazyStruct) Authorizations(org.apache.accumulo.core.security.Authorizations) LazyStringObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyStringObjectInspector) LazySimpleStructObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector) Text(org.apache.hadoop.io.Text) LazyMapObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyMapObjectInspector) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) AccumuloSerDe(org.apache.hadoop.hive.accumulo.serde.AccumuloSerDe) AccumuloSerDeParameters(org.apache.hadoop.hive.accumulo.serde.AccumuloSerDeParameters) ByteArrayRef(org.apache.hadoop.hive.serde2.lazy.ByteArrayRef) Value(org.apache.accumulo.core.data.Value) Mutation(org.apache.accumulo.core.data.Mutation) Key(org.apache.accumulo.core.data.Key) Test(org.junit.Test)

Example 9 with LazyMapObjectInspector

use of org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyMapObjectInspector in project hive by apache.

the class LazyHBaseCellMapTest method testInitColumnPrefix.

public void testInitColumnPrefix() throws Exception {
    Text nullSequence = new Text("\\N");
    ObjectInspector oi = LazyFactory.createLazyObjectInspector(TypeInfoUtils.getTypeInfosFromTypeString("map<string,string>").get(0), new byte[] { (byte) 1, (byte) 2 }, 0, nullSequence, false, (byte) 0);
    LazyHBaseCellMap b = new LazyHBaseCellMap((LazyMapObjectInspector) oi);
    // Initialize a result
    Cell[] cells = new KeyValue[2];
    final String col1 = "1";
    final String col2 = "2";
    cells[0] = new KeyValue(TEST_ROW, COLUMN_FAMILY, Bytes.toBytes(QUAL_PREFIX + col1), Bytes.toBytes("cfacol1"));
    cells[1] = new KeyValue(TEST_ROW, COLUMN_FAMILY, Bytes.toBytes(QUAL_PREFIX + col2), Bytes.toBytes("cfacol2"));
    Result r = Result.create(cells);
    List<Boolean> mapBinaryStorage = new ArrayList<Boolean>();
    mapBinaryStorage.add(false);
    mapBinaryStorage.add(false);
    b.init(r, COLUMN_FAMILY, mapBinaryStorage, Bytes.toBytes(QUAL_PREFIX), true);
    assertNotNull(b.getMapValueElement(new Text(col1)));
    assertNotNull(b.getMapValueElement(new Text(col2)));
}
Also used : LazyMapObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyMapObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) KeyValue(org.apache.hadoop.hbase.KeyValue) ArrayList(java.util.ArrayList) Text(org.apache.hadoop.io.Text) Cell(org.apache.hadoop.hbase.Cell) Result(org.apache.hadoop.hbase.client.Result)

Example 10 with LazyMapObjectInspector

use of org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyMapObjectInspector in project hive by apache.

the class LazyAccumuloMap method parse.

protected void parse() {
    if (null == this.cachedMap) {
        this.cachedMap = new LinkedHashMap<Object, Object>();
    } else {
        this.cachedMap.clear();
    }
    LazyMapObjectInspector lazyMoi = getInspector();
    Text cf = new Text(columnMapping.getColumnFamily());
    for (ColumnTuple tuple : sourceRow.getTuples()) {
        String cq = tuple.getCq().toString();
        if (!cf.equals(tuple.getCf()) || !cq.startsWith(columnMapping.getColumnQualifierPrefix())) {
            // A column family or qualifier we don't want to include in the map
            continue;
        }
        // Because we append the cq prefix when serializing the column
        // we should also remove it when pulling it from Accumulo
        cq = cq.substring(columnMapping.getColumnQualifierPrefix().length());
        // Keys are always primitive, respect the binary
        LazyPrimitive<? extends ObjectInspector, ? extends Writable> key = LazyFactory.createLazyPrimitiveClass((PrimitiveObjectInspector) lazyMoi.getMapKeyObjectInspector(), ColumnEncoding.BINARY == columnMapping.getKeyEncoding());
        ByteArrayRef keyRef = new ByteArrayRef();
        keyRef.setData(cq.getBytes(Charsets.UTF_8));
        key.init(keyRef, 0, keyRef.getData().length);
        // Value can be anything, use the obj inspector and respect binary
        LazyObject<?> value = LazyFactory.createLazyObject(lazyMoi.getMapValueObjectInspector(), ColumnEncoding.BINARY == columnMapping.getValueEncoding());
        byte[] bytes = tuple.getValue();
        if (bytes == null || isNull(oi.getNullSequence(), bytes, 0, bytes.length)) {
            value.setNull();
        } else {
            ByteArrayRef valueRef = new ByteArrayRef();
            valueRef.setData(bytes);
            value.init(valueRef, 0, valueRef.getData().length);
        }
        cachedMap.put(key, value);
    }
    this.setParsed(true);
}
Also used : ByteArrayRef(org.apache.hadoop.hive.serde2.lazy.ByteArrayRef) LazyObject(org.apache.hadoop.hive.serde2.lazy.LazyObject) LazyMapObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyMapObjectInspector) Text(org.apache.hadoop.io.Text) ColumnTuple(org.apache.hadoop.hive.accumulo.AccumuloHiveRow.ColumnTuple)

Aggregations

LazyMapObjectInspector (org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyMapObjectInspector)15 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)11 Text (org.apache.hadoop.io.Text)11 Test (org.junit.Test)9 LazySimpleStructObjectInspector (org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector)7 ArrayList (java.util.ArrayList)6 Cell (org.apache.hadoop.hbase.Cell)5 KeyValue (org.apache.hadoop.hbase.KeyValue)5 Result (org.apache.hadoop.hbase.client.Result)5 ByteArrayRef (org.apache.hadoop.hive.serde2.lazy.ByteArrayRef)5 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)5 IntWritable (org.apache.hadoop.io.IntWritable)5 Properties (java.util.Properties)4 Configuration (org.apache.hadoop.conf.Configuration)4 HiveAccumuloMapColumnMapping (org.apache.hadoop.hive.accumulo.columns.HiveAccumuloMapColumnMapping)4 LazySerDeParameters (org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters)4 LazyStringObjectInspector (org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyStringObjectInspector)4 Mutation (org.apache.accumulo.core.data.Mutation)3 LazyStruct (org.apache.hadoop.hive.serde2.lazy.LazyStruct)3 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)3