Search in sources :

Example 26 with ByteArrayRef

use of org.apache.hadoop.hive.serde2.lazy.ByteArrayRef in project hive by apache.

the class HBaseStructValue method toLazyObject.

/**
   * Create an initialize a {@link LazyObject} with the given bytes for the given fieldID.
   * 
   * @param fieldID field for which the object is to be created
   * @param bytes value with which the object is to be initialized with
   * @return initialized {@link LazyObject}
   * */
public LazyObject<? extends ObjectInspector> toLazyObject(int fieldID, byte[] bytes) {
    ObjectInspector fieldOI = oi.getAllStructFieldRefs().get(fieldID).getFieldObjectInspector();
    LazyObject<? extends ObjectInspector> lazyObject = LazyFactory.createLazyObject(fieldOI);
    ByteArrayRef ref = new ByteArrayRef();
    ref.setData(bytes);
    // initialize the lazy object
    lazyObject.init(ref, 0, ref.getData().length);
    return lazyObject;
}
Also used : LazySimpleStructObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) ByteArrayRef(org.apache.hadoop.hive.serde2.lazy.ByteArrayRef)

Example 27 with ByteArrayRef

use of org.apache.hadoop.hive.serde2.lazy.ByteArrayRef in project hive by apache.

the class HBaseTestStructSerializer method toLazyObject.

/**
   * Create an initialize a {@link LazyObject} with the given bytes for the given fieldID.
   *
   * @param fieldID field for which the object is to be created
   * @param bytes value with which the object is to be initialized with
   * 
   * @return initialized {@link LazyObject}
   * */
@Override
public LazyObject<? extends ObjectInspector> toLazyObject(int fieldID, byte[] bytes) {
    ObjectInspector fieldOI = oi.getAllStructFieldRefs().get(fieldID).getFieldObjectInspector();
    LazyObject<? extends ObjectInspector> lazyObject = LazyFactory.createLazyObject(fieldOI);
    ByteArrayRef ref = new ByteArrayRef();
    ref.setData(bytes);
    // initialize the lazy object
    lazyObject.init(ref, 0, ref.getData().length);
    return lazyObject;
}
Also used : LazySimpleStructObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) ByteArrayRef(org.apache.hadoop.hive.serde2.lazy.ByteArrayRef)

Example 28 with ByteArrayRef

use of org.apache.hadoop.hive.serde2.lazy.ByteArrayRef in project hive by apache.

the class LazyHBaseCellMap method parse.

private void parse() {
    if (cachedMap == null) {
        cachedMap = new LinkedHashMap<Object, Object>();
    } else {
        cachedMap.clear();
    }
    NavigableMap<byte[], byte[]> familyMap = result.getFamilyMap(columnFamilyBytes);
    if (familyMap != null) {
        for (Entry<byte[], byte[]> e : familyMap.entrySet()) {
            // null values and values of zero length are not added to the cachedMap
            if (e.getValue() == null || e.getValue().length == 0) {
                continue;
            }
            if (qualPrefix != null && !Bytes.startsWith(e.getKey(), qualPrefix)) {
                // prefix
                continue;
            }
            LazyMapObjectInspector lazyMoi = getInspector();
            // Keys are always primitive
            LazyPrimitive<? extends ObjectInspector, ? extends Writable> key = LazyFactory.createLazyPrimitiveClass((PrimitiveObjectInspector) lazyMoi.getMapKeyObjectInspector(), binaryStorage.get(0));
            ByteArrayRef keyRef = new ByteArrayRef();
            if (qualPrefix != null && hideQualPrefix) {
                //cut prefix from hive's map key
                keyRef.setData(Bytes.tail(e.getKey(), e.getKey().length - qualPrefix.length));
            } else {
                //for non-prefix maps
                keyRef.setData(e.getKey());
            }
            key.init(keyRef, 0, keyRef.getData().length);
            // Value
            LazyObject<?> value = LazyFactory.createLazyObject(lazyMoi.getMapValueObjectInspector(), binaryStorage.get(1));
            byte[] bytes = e.getValue();
            if (isNull(oi.getNullSequence(), bytes, 0, bytes.length)) {
                value.setNull();
            } else {
                ByteArrayRef valueRef = new ByteArrayRef();
                valueRef.setData(bytes);
                value.init(valueRef, 0, valueRef.getData().length);
            }
            // Put the key/value into the map
            cachedMap.put(key.getObject(), value.getObject());
        }
    }
    setParsed(true);
}
Also used : ByteArrayRef(org.apache.hadoop.hive.serde2.lazy.ByteArrayRef) LazyObject(org.apache.hadoop.hive.serde2.lazy.LazyObject) LazyMapObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyMapObjectInspector)

Example 29 with ByteArrayRef

use of org.apache.hadoop.hive.serde2.lazy.ByteArrayRef in project hive by apache.

the class LazyHBaseRow method uncheckedGetField.

/**
   * Get the field out of the row without checking whether parsing is needed.
   * This is called by both getField and getFieldsAsList.
   * @param fieldID  The id of the field starting from 0.
   * @return  The value of the field
   */
private Object uncheckedGetField(int fieldID) {
    LazyObjectBase[] fields = getFields();
    boolean[] fieldsInited = getFieldInited();
    if (!fieldsInited[fieldID]) {
        fieldsInited[fieldID] = true;
        ColumnMapping colMap = columnsMapping[fieldID];
        if (!colMap.hbaseRowKey && !colMap.hbaseTimestamp && colMap.qualifierName == null) {
            // it is a column family
            // primitive type for Map<Key, Value> can be stored in binary format. Pass in the
            // qualifier prefix to cherry pick the qualifiers that match the prefix instead of picking
            // up everything
            ((LazyHBaseCellMap) fields[fieldID]).init(result, colMap.familyNameBytes, colMap.binaryStorage, colMap.qualifierPrefixBytes, colMap.isDoPrefixCut());
            return fields[fieldID].getObject();
        }
        if (colMap.hbaseTimestamp) {
            // Get the latest timestamp of all the cells as the row timestamp
            // from hbase-0.96.0
            long timestamp = result.rawCells()[0].getTimestamp();
            for (int i = 1; i < result.rawCells().length; i++) {
                timestamp = Math.max(timestamp, result.rawCells()[i].getTimestamp());
            }
            LazyObjectBase lz = fields[fieldID];
            if (lz instanceof LazyTimestamp) {
                ((LazyTimestamp) lz).getWritableObject().setTime(timestamp);
            } else {
                ((LazyLong) lz).getWritableObject().set(timestamp);
            }
            return lz.getObject();
        }
        byte[] bytes;
        if (colMap.hbaseRowKey) {
            bytes = result.getRow();
        } else {
            // it is a column i.e. a column-family with column-qualifier
            bytes = result.getValue(colMap.familyNameBytes, colMap.qualifierNameBytes);
        }
        if (bytes == null || isNull(oi.getNullSequence(), bytes, 0, bytes.length)) {
            fields[fieldID].setNull();
        } else {
            ByteArrayRef ref = new ByteArrayRef();
            ref.setData(bytes);
            fields[fieldID].init(ref, 0, bytes.length);
        }
    }
    return fields[fieldID].getObject();
}
Also used : ByteArrayRef(org.apache.hadoop.hive.serde2.lazy.ByteArrayRef) LazyTimestamp(org.apache.hadoop.hive.serde2.lazy.LazyTimestamp) LazyObjectBase(org.apache.hadoop.hive.serde2.lazy.LazyObjectBase) ColumnMapping(org.apache.hadoop.hive.hbase.ColumnMappings.ColumnMapping)

Example 30 with ByteArrayRef

use of org.apache.hadoop.hive.serde2.lazy.ByteArrayRef in project hive by apache.

the class LazyAccumuloMap method parse.

protected void parse() {
    if (null == this.cachedMap) {
        this.cachedMap = new LinkedHashMap<Object, Object>();
    } else {
        this.cachedMap.clear();
    }
    LazyMapObjectInspector lazyMoi = getInspector();
    Text cf = new Text(columnMapping.getColumnFamily());
    for (ColumnTuple tuple : sourceRow.getTuples()) {
        String cq = tuple.getCq().toString();
        if (!cf.equals(tuple.getCf()) || !cq.startsWith(columnMapping.getColumnQualifierPrefix())) {
            // A column family or qualifier we don't want to include in the map
            continue;
        }
        // Because we append the cq prefix when serializing the column
        // we should also remove it when pulling it from Accumulo
        cq = cq.substring(columnMapping.getColumnQualifierPrefix().length());
        // Keys are always primitive, respect the binary
        LazyPrimitive<? extends ObjectInspector, ? extends Writable> key = LazyFactory.createLazyPrimitiveClass((PrimitiveObjectInspector) lazyMoi.getMapKeyObjectInspector(), ColumnEncoding.BINARY == columnMapping.getKeyEncoding());
        ByteArrayRef keyRef = new ByteArrayRef();
        keyRef.setData(cq.getBytes(Charsets.UTF_8));
        key.init(keyRef, 0, keyRef.getData().length);
        // Value can be anything, use the obj inspector and respect binary
        LazyObject<?> value = LazyFactory.createLazyObject(lazyMoi.getMapValueObjectInspector(), ColumnEncoding.BINARY == columnMapping.getValueEncoding());
        byte[] bytes = tuple.getValue();
        if (bytes == null || isNull(oi.getNullSequence(), bytes, 0, bytes.length)) {
            value.setNull();
        } else {
            ByteArrayRef valueRef = new ByteArrayRef();
            valueRef.setData(bytes);
            value.init(valueRef, 0, valueRef.getData().length);
        }
        cachedMap.put(key, value);
    }
    this.setParsed(true);
}
Also used : ByteArrayRef(org.apache.hadoop.hive.serde2.lazy.ByteArrayRef) LazyObject(org.apache.hadoop.hive.serde2.lazy.LazyObject) LazyMapObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyMapObjectInspector) Text(org.apache.hadoop.io.Text) ColumnTuple(org.apache.hadoop.hive.accumulo.AccumuloHiveRow.ColumnTuple)

Aggregations

ByteArrayRef (org.apache.hadoop.hive.serde2.lazy.ByteArrayRef)27 LazySimpleStructObjectInspector (org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector)12 Text (org.apache.hadoop.io.Text)11 Test (org.junit.Test)11 Mutation (org.apache.accumulo.core.data.Mutation)10 LazyStruct (org.apache.hadoop.hive.serde2.lazy.LazyStruct)9 DataInputStream (java.io.DataInputStream)8 Properties (java.util.Properties)8 Configuration (org.apache.hadoop.conf.Configuration)8 ByteStream (org.apache.hadoop.hive.serde2.ByteStream)8 LazySerDeParameters (org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters)8 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)8 IOException (java.io.IOException)7 Connector (org.apache.accumulo.core.client.Connector)6 MockInstance (org.apache.accumulo.core.client.mock.MockInstance)6 PasswordToken (org.apache.accumulo.core.client.security.tokens.PasswordToken)6 Key (org.apache.accumulo.core.data.Key)6 Value (org.apache.accumulo.core.data.Value)6 Authorizations (org.apache.accumulo.core.security.Authorizations)6 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)6