Search in sources :

Example 16 with LazyFactory.createLazyObject

use of org.apache.hadoop.hive.serde2.lazy.LazyFactory.createLazyObject in project hive by apache.

the class MultiDelimitSerDe method initialize.

@Override
public void initialize(Configuration conf, Properties tbl) throws SerDeException {
    // get the SerDe parameters
    super.initialize(conf, tbl);
    serdeParams = new LazySerDeParameters(conf, tbl, getClass().getName());
    fieldDelimited = tbl.getProperty(serdeConstants.FIELD_DELIM);
    if (fieldDelimited == null || fieldDelimited.isEmpty()) {
        throw new SerDeException("This table does not have serde property \"field.delim\"!");
    }
    // get the collection separator and map key separator
    // TODO: use serdeConstants.COLLECTION_DELIM when the typo is fixed
    collSep = LazyUtils.getByte(tbl.getProperty(COLLECTION_DELIM), DEFAULT_SEPARATORS[1]);
    keySep = LazyUtils.getByte(tbl.getProperty(serdeConstants.MAPKEY_DELIM), DEFAULT_SEPARATORS[2]);
    serdeParams.setSeparator(1, collSep);
    serdeParams.setSeparator(2, keySep);
    // Create the ObjectInspectors for the fields
    cachedObjectInspector = LazyFactory.createLazyStructInspector(serdeParams.getColumnNames(), serdeParams.getColumnTypes(), serdeParams.getSeparators(), serdeParams.getNullSequence(), serdeParams.isLastColumnTakesRest(), serdeParams.isEscaped(), serdeParams.getEscapeChar());
    cachedLazyStruct = (LazyStruct) LazyFactory.createLazyObject(cachedObjectInspector);
    assert serdeParams.getColumnNames().size() == serdeParams.getColumnTypes().size();
    numColumns = serdeParams.getColumnNames().size();
}
Also used : LazySerDeParameters(org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters)

Example 17 with LazyFactory.createLazyObject

use of org.apache.hadoop.hive.serde2.lazy.LazyFactory.createLazyObject in project hive by apache.

the class LazyAccumuloMap method parse.

protected void parse() {
    if (null == this.cachedMap) {
        this.cachedMap = new LinkedHashMap<Object, Object>();
    } else {
        this.cachedMap.clear();
    }
    LazyMapObjectInspector lazyMoi = getInspector();
    Text cf = new Text(columnMapping.getColumnFamily());
    for (ColumnTuple tuple : sourceRow.getTuples()) {
        String cq = tuple.getCq().toString();
        if (!cf.equals(tuple.getCf()) || !cq.startsWith(columnMapping.getColumnQualifierPrefix())) {
            // A column family or qualifier we don't want to include in the map
            continue;
        }
        // Because we append the cq prefix when serializing the column
        // we should also remove it when pulling it from Accumulo
        cq = cq.substring(columnMapping.getColumnQualifierPrefix().length());
        // Keys are always primitive, respect the binary
        LazyPrimitive<? extends ObjectInspector, ? extends Writable> key = LazyFactory.createLazyPrimitiveClass((PrimitiveObjectInspector) lazyMoi.getMapKeyObjectInspector(), ColumnEncoding.BINARY == columnMapping.getKeyEncoding());
        ByteArrayRef keyRef = new ByteArrayRef();
        keyRef.setData(cq.getBytes(Charsets.UTF_8));
        key.init(keyRef, 0, keyRef.getData().length);
        // Value can be anything, use the obj inspector and respect binary
        LazyObject<?> value = LazyFactory.createLazyObject(lazyMoi.getMapValueObjectInspector(), ColumnEncoding.BINARY == columnMapping.getValueEncoding());
        byte[] bytes = tuple.getValue();
        if (bytes == null || isNull(oi.getNullSequence(), bytes, 0, bytes.length)) {
            value.setNull();
        } else {
            ByteArrayRef valueRef = new ByteArrayRef();
            valueRef.setData(bytes);
            value.init(valueRef, 0, valueRef.getData().length);
        }
        cachedMap.put(key, value);
    }
    this.setParsed(true);
}
Also used : ByteArrayRef(org.apache.hadoop.hive.serde2.lazy.ByteArrayRef) LazyObject(org.apache.hadoop.hive.serde2.lazy.LazyObject) LazyMapObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyMapObjectInspector) Text(org.apache.hadoop.io.Text) ColumnTuple(org.apache.hadoop.hive.accumulo.AccumuloHiveRow.ColumnTuple)

Example 18 with LazyFactory.createLazyObject

use of org.apache.hadoop.hive.serde2.lazy.LazyFactory.createLazyObject in project hive by apache.

the class DelimitedAccumuloRowIdFactory method createRowId.

@Override
public LazyObjectBase createRowId(ObjectInspector inspector) throws SerDeException {
    LazyObjectBase lazyObj = LazyFactory.createLazyObject(inspector, ColumnEncoding.BINARY == rowIdMapping.getEncoding());
    log.info("Created " + lazyObj.getClass() + " for rowId with inspector " + inspector.getClass());
    return lazyObj;
}
Also used : LazyObjectBase(org.apache.hadoop.hive.serde2.lazy.LazyObjectBase)

Example 19 with LazyFactory.createLazyObject

use of org.apache.hadoop.hive.serde2.lazy.LazyFactory.createLazyObject in project hive by apache.

the class TestAccumuloRowSerializer method testVisibilityLabel.

@Test
public void testVisibilityLabel() throws IOException, SerDeException {
    List<String> columns = Arrays.asList("row", "cq1", "cq2", "cq3");
    List<TypeInfo> types = Arrays.<TypeInfo>asList(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.intTypeInfo, TypeInfoFactory.intTypeInfo, TypeInfoFactory.stringTypeInfo);
    List<String> typeNames = new ArrayList<String>(types.size());
    for (TypeInfo type : types) {
        typeNames.add(type.getTypeName());
    }
    Properties tableProperties = new Properties();
    tableProperties.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowid,cf:cq1#b,cf:cq2#b,cf:cq3");
    tableProperties.setProperty(serdeConstants.FIELD_DELIM, " ");
    tableProperties.setProperty(serdeConstants.LIST_COLUMNS, Joiner.on(',').join(columns));
    tableProperties.setProperty(serdeConstants.LIST_COLUMN_TYPES, Joiner.on(',').join(typeNames));
    AccumuloSerDeParameters accumuloSerDeParams = new AccumuloSerDeParameters(new Configuration(), tableProperties, AccumuloSerDe.class.getSimpleName());
    LazySerDeParameters serDeParams = accumuloSerDeParams.getSerDeParameters();
    LazySimpleStructObjectInspector oi = (LazySimpleStructObjectInspector) LazyFactory.createLazyStructInspector(columns, types, serDeParams.getSeparators(), serDeParams.getNullSequence(), serDeParams.isLastColumnTakesRest(), serDeParams.isEscaped(), serDeParams.getEscapeChar());
    AccumuloRowSerializer serializer = new AccumuloRowSerializer(0, serDeParams, accumuloSerDeParams.getColumnMappings(), new ColumnVisibility("foo"), accumuloSerDeParams.getRowIdFactory());
    // Create the LazyStruct from the LazyStruct...Inspector
    LazyStruct obj = (LazyStruct) LazyFactory.createLazyObject(oi);
    ByteArrayRef byteRef = new ByteArrayRef();
    byteRef.setData(new byte[] { 'r', 'o', 'w', '1', ' ', '1', '0', ' ', '2', '0', ' ', 'v', 'a', 'l', 'u', 'e' });
    obj.init(byteRef, 0, byteRef.getData().length);
    Mutation m = (Mutation) serializer.serialize(obj, oi);
    Assert.assertArrayEquals("row1".getBytes(), m.getRow());
    List<ColumnUpdate> updates = m.getUpdates();
    Assert.assertEquals(3, updates.size());
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    DataOutputStream out = new DataOutputStream(baos);
    ColumnUpdate update = updates.get(0);
    Assert.assertEquals("cf", new String(update.getColumnFamily()));
    Assert.assertEquals("cq1", new String(update.getColumnQualifier()));
    Assert.assertEquals("foo", new String(update.getColumnVisibility()));
    out.writeInt(10);
    Assert.assertArrayEquals(baos.toByteArray(), update.getValue());
    update = updates.get(1);
    Assert.assertEquals("cf", new String(update.getColumnFamily()));
    Assert.assertEquals("cq2", new String(update.getColumnQualifier()));
    Assert.assertEquals("foo", new String(update.getColumnVisibility()));
    baos.reset();
    out.writeInt(20);
    Assert.assertArrayEquals(baos.toByteArray(), update.getValue());
    update = updates.get(2);
    Assert.assertEquals("cf", new String(update.getColumnFamily()));
    Assert.assertEquals("cq3", new String(update.getColumnQualifier()));
    Assert.assertEquals("foo", new String(update.getColumnVisibility()));
    Assert.assertEquals("value", new String(update.getValue()));
}
Also used : LazySimpleStructObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector) ColumnUpdate(org.apache.accumulo.core.data.ColumnUpdate) Configuration(org.apache.hadoop.conf.Configuration) LazySerDeParameters(org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters) DataOutputStream(java.io.DataOutputStream) ArrayList(java.util.ArrayList) ByteArrayOutputStream(java.io.ByteArrayOutputStream) Properties(java.util.Properties) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) ByteArrayRef(org.apache.hadoop.hive.serde2.lazy.ByteArrayRef) ColumnVisibility(org.apache.accumulo.core.security.ColumnVisibility) Mutation(org.apache.accumulo.core.data.Mutation) LazyStruct(org.apache.hadoop.hive.serde2.lazy.LazyStruct) Test(org.junit.Test)

Example 20 with LazyFactory.createLazyObject

use of org.apache.hadoop.hive.serde2.lazy.LazyFactory.createLazyObject in project hive by apache.

the class LazyHBaseCellMap method parse.

private void parse() {
    if (cachedMap == null) {
        cachedMap = new LinkedHashMap<Object, Object>();
    } else {
        cachedMap.clear();
    }
    NavigableMap<byte[], byte[]> familyMap = result.getFamilyMap(columnFamilyBytes);
    if (familyMap != null) {
        for (Entry<byte[], byte[]> e : familyMap.entrySet()) {
            // null values and values of zero length are not added to the cachedMap
            if (e.getValue() == null || e.getValue().length == 0) {
                continue;
            }
            if (qualPrefix != null && !Bytes.startsWith(e.getKey(), qualPrefix)) {
                // prefix
                continue;
            }
            LazyMapObjectInspector lazyMoi = getInspector();
            // Keys are always primitive
            LazyPrimitive<? extends ObjectInspector, ? extends Writable> key = LazyFactory.createLazyPrimitiveClass((PrimitiveObjectInspector) lazyMoi.getMapKeyObjectInspector(), binaryStorage.get(0));
            ByteArrayRef keyRef = new ByteArrayRef();
            if (qualPrefix != null && hideQualPrefix) {
                // cut prefix from hive's map key
                keyRef.setData(Bytes.tail(e.getKey(), e.getKey().length - qualPrefix.length));
            } else {
                // for non-prefix maps
                keyRef.setData(e.getKey());
            }
            key.init(keyRef, 0, keyRef.getData().length);
            // Value
            LazyObject<?> value = LazyFactory.createLazyObject(lazyMoi.getMapValueObjectInspector(), binaryStorage.get(1));
            byte[] bytes = e.getValue();
            if (isNull(oi.getNullSequence(), bytes, 0, bytes.length)) {
                value.setNull();
            } else {
                ByteArrayRef valueRef = new ByteArrayRef();
                valueRef.setData(bytes);
                value.init(valueRef, 0, valueRef.getData().length);
            }
            // Put the key/value into the map
            cachedMap.put(key.getObject(), value.getObject());
        }
    }
    setParsed(true);
}
Also used : ByteArrayRef(org.apache.hadoop.hive.serde2.lazy.ByteArrayRef) LazyObject(org.apache.hadoop.hive.serde2.lazy.LazyObject) LazyMapObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyMapObjectInspector)

Aggregations

ByteArrayRef (org.apache.hadoop.hive.serde2.lazy.ByteArrayRef)19 LazySimpleStructObjectInspector (org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector)14 Mutation (org.apache.accumulo.core.data.Mutation)10 Test (org.junit.Test)10 Configuration (org.apache.hadoop.conf.Configuration)9 LazySerDeParameters (org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters)9 Text (org.apache.hadoop.io.Text)9 Properties (java.util.Properties)8 LazyStruct (org.apache.hadoop.hive.serde2.lazy.LazyStruct)8 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)8 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)8 LazyMapObjectInspector (org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyMapObjectInspector)7 Entry (java.util.Map.Entry)6 Connector (org.apache.accumulo.core.client.Connector)6 MockInstance (org.apache.accumulo.core.client.mock.MockInstance)6 PasswordToken (org.apache.accumulo.core.client.security.tokens.PasswordToken)6 Key (org.apache.accumulo.core.data.Key)6 Value (org.apache.accumulo.core.data.Value)6 Authorizations (org.apache.accumulo.core.security.Authorizations)6 JobConf (org.apache.hadoop.mapred.JobConf)6