Examples with BinarySortableSerDe - org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe

Example 6 with BinarySortableSerDe

use of org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe in project hive by apache.

the class HBaseUtils method deserializePartitionKey.

// Deserialize a partition key and return _only_ the partition values.
private static List<String> deserializePartitionKey(List<FieldSchema> partitions, byte[] key, Configuration conf) {
    StringBuffer names = new StringBuffer();
    names.append("dbName,tableName,");
    StringBuffer types = new StringBuffer();
    types.append("string,string,");
    for (int i = 0; i < partitions.size(); i++) {
        names.append(partitions.get(i).getName());
        types.append(TypeInfoUtils.getTypeInfoFromTypeString(partitions.get(i).getType()));
        if (i != partitions.size() - 1) {
            names.append(",");
            types.append(",");
        }
    }
    BinarySortableSerDe serDe = new BinarySortableSerDe();
    Properties props = new Properties();
    props.setProperty(serdeConstants.LIST_COLUMNS, names.toString());
    props.setProperty(serdeConstants.LIST_COLUMN_TYPES, types.toString());
    try {
        serDe.initialize(conf, props);
        List deserializedkeys = ((List) serDe.deserialize(new BytesWritable(key))).subList(2, partitions.size() + 2);
        List<String> partitionKeys = new ArrayList<String>();
        for (int i = 0; i < deserializedkeys.size(); i++) {
            Object deserializedKey = deserializedkeys.get(i);
            if (deserializedKey == null) {
                partitionKeys.add(HiveConf.getVar(conf, HiveConf.ConfVars.DEFAULTPARTITIONNAME));
            } else {
                TypeInfo inputType = TypeInfoUtils.getTypeInfoFromTypeString(partitions.get(i).getType());
                ObjectInspector inputOI = TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(inputType);
                Converter converter = ObjectInspectorConverters.getConverter(inputOI, PrimitiveObjectInspectorFactory.javaStringObjectInspector);
                partitionKeys.add((String) converter.convert(deserializedKey));
            }
        }
        return partitionKeys;
    } catch (SerDeException e) {
        throw new RuntimeException("Error when deserialize key", e);
    }
}

Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) BinarySortableSerDe(org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe) ArrayList(java.util.ArrayList) BytesWritable(org.apache.hadoop.io.BytesWritable) ByteString(com.google.protobuf.ByteString) Properties(java.util.Properties) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) Converter(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter) List(java.util.List) ArrayList(java.util.ArrayList) SerDeException(org.apache.hadoop.hive.serde2.SerDeException)

Example 7 with BinarySortableSerDe

use of org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe in project hive by apache.

the class HybridHashTableContainer method setSerde.

@Override
public void setSerde(MapJoinObjectSerDeContext keyCtx, MapJoinObjectSerDeContext valCtx) throws SerDeException {
    AbstractSerDe keySerde = keyCtx.getSerDe(), valSerde = valCtx.getSerDe();
    if (writeHelper == null) {
        LOG.info("Initializing container with " + keySerde.getClass().getName() + " and " + valSerde.getClass().getName());
        // We assume this hashtable is loaded only when tez is enabled
        LazyBinaryStructObjectInspector valSoi = (LazyBinaryStructObjectInspector) valSerde.getObjectInspector();
        writeHelper = new MapJoinBytesTableContainer.LazyBinaryKvWriter(keySerde, valSoi, valCtx.hasFilterTag());
        if (internalValueOi == null) {
            internalValueOi = valSoi;
        }
        if (sortableSortOrders == null) {
            sortableSortOrders = ((BinarySortableSerDe) keySerde).getSortOrders();
        }
        if (nullMarkers == null) {
            nullMarkers = ((BinarySortableSerDe) keySerde).getNullMarkers();
        }
        if (notNullMarkers == null) {
            notNullMarkers = ((BinarySortableSerDe) keySerde).getNotNullMarkers();
        }
    }
}

Also used : AbstractSerDe(org.apache.hadoop.hive.serde2.AbstractSerDe) LazyBinaryStructObjectInspector(org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryStructObjectInspector)

Example 8 with BinarySortableSerDe

use of org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe in project hive by apache.

the class MapJoinBytesTableContainer method setSerde.

@Override
public void setSerde(MapJoinObjectSerDeContext keyContext, MapJoinObjectSerDeContext valueContext) throws SerDeException {
    AbstractSerDe keySerde = keyContext.getSerDe(), valSerde = valueContext.getSerDe();
    if (writeHelper == null) {
        LOG.info("Initializing container with " + keySerde.getClass().getName() + " and " + valSerde.getClass().getName());
        if (keySerde instanceof BinarySortableSerDe && valSerde instanceof LazyBinarySerDe) {
            LazyBinaryStructObjectInspector valSoi = (LazyBinaryStructObjectInspector) valSerde.getObjectInspector();
            writeHelper = new LazyBinaryKvWriter(keySerde, valSoi, valueContext.hasFilterTag());
            internalValueOi = valSoi;
            sortableSortOrders = ((BinarySortableSerDe) keySerde).getSortOrders();
            nullMarkers = ((BinarySortableSerDe) keySerde).getNullMarkers();
            notNullMarkers = ((BinarySortableSerDe) keySerde).getNotNullMarkers();
        } else {
            writeHelper = new KeyValueWriter(keySerde, valSerde, valueContext.hasFilterTag());
            internalValueOi = createInternalOi(valueContext);
            sortableSortOrders = null;
            nullMarkers = null;
            notNullMarkers = null;
        }
    }
}

Also used : BinarySortableSerDe(org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe) LazyBinarySerDe(org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe) AbstractSerDe(org.apache.hadoop.hive.serde2.AbstractSerDe) LazyBinaryStructObjectInspector(org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryStructObjectInspector)

Example 9 with BinarySortableSerDe

use of org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe in project hive by apache.

the class SparkReduceRecordHandler method init.

@Override
@SuppressWarnings("unchecked")
public void init(JobConf job, OutputCollector output, Reporter reporter) throws Exception {
    perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.SPARK_INIT_OPERATORS);
    super.init(job, output, reporter);
    rowObjectInspector = new ObjectInspector[Byte.MAX_VALUE];
    ObjectInspector[] valueObjectInspector = new ObjectInspector[Byte.MAX_VALUE];
    ObjectInspector keyObjectInspector;
    ReduceWork gWork = Utilities.getReduceWork(job);
    reducer = gWork.getReducer();
    vectorized = gWork.getVectorMode();
    // clear out any parents as reducer is the
    reducer.setParentOperators(null);
    // root
    isTagged = gWork.getNeedsTagging();
    try {
        keyTableDesc = gWork.getKeyDesc();
        inputKeyDeserializer = ReflectionUtils.newInstance(keyTableDesc.getDeserializerClass(), null);
        SerDeUtils.initializeSerDe(inputKeyDeserializer, null, keyTableDesc.getProperties(), null);
        keyObjectInspector = inputKeyDeserializer.getObjectInspector();
        valueTableDesc = new TableDesc[gWork.getTagToValueDesc().size()];
        if (vectorized) {
            final int maxTags = gWork.getTagToValueDesc().size();
            // CONSIDER: Cleaning up this code and eliminating the arrays.  Vectorization only handles
            // one operator tree.
            Preconditions.checkState(maxTags == 1);
            keyStructInspector = (StructObjectInspector) keyObjectInspector;
            firstValueColumnOffset = keyStructInspector.getAllStructFieldRefs().size();
            buffer = new DataOutputBuffer();
        }
        for (int tag = 0; tag < gWork.getTagToValueDesc().size(); tag++) {
            // We should initialize the SerDe with the TypeInfo when available.
            valueTableDesc[tag] = gWork.getTagToValueDesc().get(tag);
            inputValueDeserializer[tag] = ReflectionUtils.newInstance(valueTableDesc[tag].getDeserializerClass(), null);
            SerDeUtils.initializeSerDe(inputValueDeserializer[tag], null, valueTableDesc[tag].getProperties(), null);
            valueObjectInspector[tag] = inputValueDeserializer[tag].getObjectInspector();
            ArrayList<ObjectInspector> ois = new ArrayList<ObjectInspector>();
            if (vectorized) {
                /* vectorization only works with struct object inspectors */
                valueStructInspector = (StructObjectInspector) valueObjectInspector[tag];
                final int totalColumns = firstValueColumnOffset + valueStructInspector.getAllStructFieldRefs().size();
                rowObjectInspector[tag] = Utilities.constructVectorizedReduceRowOI(keyStructInspector, valueStructInspector);
                batch = gWork.getVectorizedRowBatchCtx().createVectorizedRowBatch();
                // Setup vectorized deserialization for the key and value.
                BinarySortableSerDe binarySortableSerDe = (BinarySortableSerDe) inputKeyDeserializer;
                keyBinarySortableDeserializeToRow = new VectorDeserializeRow<BinarySortableDeserializeRead>(new BinarySortableDeserializeRead(VectorizedBatchUtil.typeInfosFromStructObjectInspector(keyStructInspector), /* useExternalBuffer */
                true, binarySortableSerDe.getSortOrders(), binarySortableSerDe.getNullMarkers(), binarySortableSerDe.getNotNullMarkers()));
                keyBinarySortableDeserializeToRow.init(0);
                final int valuesSize = valueStructInspector.getAllStructFieldRefs().size();
                if (valuesSize > 0) {
                    valueLazyBinaryDeserializeToRow = new VectorDeserializeRow<LazyBinaryDeserializeRead>(new LazyBinaryDeserializeRead(VectorizedBatchUtil.typeInfosFromStructObjectInspector(valueStructInspector), /* useExternalBuffer */
                    true));
                    valueLazyBinaryDeserializeToRow.init(firstValueColumnOffset);
                    // Create data buffers for value bytes column vectors.
                    for (int i = firstValueColumnOffset; i < batch.numCols; i++) {
                        ColumnVector colVector = batch.cols[i];
                        if (colVector instanceof BytesColumnVector) {
                            BytesColumnVector bytesColumnVector = (BytesColumnVector) colVector;
                            bytesColumnVector.initBuffer();
                        }
                    }
                }
            } else {
                ois.add(keyObjectInspector);
                ois.add(valueObjectInspector[tag]);
                // reducer.setGroupKeyObjectInspector(keyObjectInspector);
                rowObjectInspector[tag] = ObjectInspectorFactory.getStandardStructObjectInspector(Utilities.reduceFieldNameList, ois);
            }
        }
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
    ExecMapperContext execContext = new ExecMapperContext(job);
    localWork = gWork.getMapRedLocalWork();
    execContext.setJc(jc);
    execContext.setLocalWork(localWork);
    reducer.passExecContext(execContext);
    reducer.setReporter(rp);
    OperatorUtils.setChildrenCollector(Arrays.<Operator<? extends OperatorDesc>>asList(reducer), output);
    // initialize reduce operator tree
    try {
        LOG.info(reducer.dump(0));
        reducer.initialize(jc, rowObjectInspector);
        if (localWork != null) {
            for (Operator<? extends OperatorDesc> dummyOp : localWork.getDummyParentOp()) {
                dummyOp.setExecContext(execContext);
                dummyOp.initialize(jc, null);
            }
        }
    } catch (Throwable e) {
        abort = true;
        if (e instanceof OutOfMemoryError) {
            // Don't create a new object if we are already out of memory
            throw (OutOfMemoryError) e;
        } else {
            throw new RuntimeException("Reduce operator initialization failed", e);
        }
    }
    perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.SPARK_INIT_OPERATORS);
}

Also used : ExecMapperContext(org.apache.hadoop.hive.ql.exec.mr.ExecMapperContext) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) BinarySortableSerDe(org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe) ArrayList(java.util.ArrayList) BinarySortableDeserializeRead(org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead) ReduceWork(org.apache.hadoop.hive.ql.plan.ReduceWork) IOException(java.io.IOException) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) ColumnVector(org.apache.hadoop.hive.ql.exec.vector.ColumnVector) DataOutputBuffer(org.apache.hadoop.io.DataOutputBuffer) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) LazyBinaryDeserializeRead(org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead)

Example 10 with BinarySortableSerDe

use of org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe in project hive by apache.

the class MapJoinTestConfig method createMapJoinTableContainerSerDe.

public static MapJoinTableContainerSerDe createMapJoinTableContainerSerDe(MapJoinDesc mapJoinDesc) throws SerDeException {
    final Byte smallTablePos = 1;
    // UNDONE: Why do we need to specify BinarySortableSerDe explicitly here???
    TableDesc keyTableDesc = mapJoinDesc.getKeyTblDesc();
    AbstractSerDe keySerializer = (AbstractSerDe) ReflectionUtil.newInstance(BinarySortableSerDe.class, null);
    SerDeUtils.initializeSerDe(keySerializer, null, keyTableDesc.getProperties(), null);
    MapJoinObjectSerDeContext keyContext = new MapJoinObjectSerDeContext(keySerializer, false);
    TableDesc valueTableDesc;
    if (mapJoinDesc.getNoOuterJoin()) {
        valueTableDesc = mapJoinDesc.getValueTblDescs().get(smallTablePos);
    } else {
        valueTableDesc = mapJoinDesc.getValueFilteredTblDescs().get(smallTablePos);
    }
    AbstractSerDe valueSerDe = (AbstractSerDe) ReflectionUtil.newInstance(valueTableDesc.getDeserializerClass(), null);
    SerDeUtils.initializeSerDe(valueSerDe, null, valueTableDesc.getProperties(), null);
    MapJoinObjectSerDeContext valueContext = new MapJoinObjectSerDeContext(valueSerDe, hasFilter(mapJoinDesc, smallTablePos));
    MapJoinTableContainerSerDe mapJoinTableContainerSerDe = new MapJoinTableContainerSerDe(keyContext, valueContext);
    return mapJoinTableContainerSerDe;
}

Also used : MapJoinTableContainerSerDe(org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerSerDe) BinarySortableSerDe(org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe) MapJoinObjectSerDeContext(org.apache.hadoop.hive.ql.exec.persistence.MapJoinObjectSerDeContext) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) AbstractSerDe(org.apache.hadoop.hive.serde2.AbstractSerDe)

Aggregations

BinarySortableSerDe (org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe)8 ArrayList (java.util.ArrayList)6 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)5 SerDeException (org.apache.hadoop.hive.serde2.SerDeException)4 List (java.util.List)3 BytesColumnVector (org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector)3 ColumnVector (org.apache.hadoop.hive.ql.exec.vector.ColumnVector)3 AbstractSerDe (org.apache.hadoop.hive.serde2.AbstractSerDe)3 BinarySortableDeserializeRead (org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead)3 LazyBinaryDeserializeRead (org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead)3 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)3 BytesWritable (org.apache.hadoop.io.BytesWritable)3 ByteString (com.google.protobuf.ByteString)2 Properties (java.util.Properties)2 LazyBinaryStructObjectInspector (org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryStructObjectInspector)2 Converter (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter)2 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)2 KeyValueReader (org.apache.tez.runtime.library.api.KeyValueReader)2 KeyValuesReader (org.apache.tez.runtime.library.api.KeyValuesReader)2 IOException (java.io.IOException)1