Search in sources :

Example 1 with TopNHash

use of org.apache.hadoop.hive.ql.exec.TopNHash in project hive by apache.

the class VectorReduceSinkCommonOperator method initializeOp.

@Override
protected void initializeOp(Configuration hconf) throws HiveException {
    super.initializeOp(hconf);
    VectorExpression.doTransientInit(reduceSinkKeyExpressions);
    VectorExpression.doTransientInit(reduceSinkValueExpressions);
    if (LOG.isDebugEnabled()) {
        // Determine the name of our map or reduce task for debug tracing.
        BaseWork work = Utilities.getMapWork(hconf);
        if (work == null) {
            work = Utilities.getReduceWork(hconf);
        }
        taskName = work.getName();
    }
    String context = hconf.get(Operator.CONTEXT_NAME_KEY, "");
    if (context != null && !context.isEmpty()) {
        context = "_" + context.replace(" ", "_");
    }
    reduceSkipTag = conf.getSkipTag();
    reduceTagByte = (byte) conf.getTag();
    if (LOG.isInfoEnabled()) {
        LOG.info("Using tag = " + (int) reduceTagByte);
    }
    if (!isEmptyKey) {
        TableDesc keyTableDesc = conf.getKeySerializeInfo();
        boolean[] columnSortOrder = getColumnSortOrder(keyTableDesc.getProperties(), reduceSinkKeyColumnMap.length);
        byte[] columnNullMarker = getColumnNullMarker(keyTableDesc.getProperties(), reduceSinkKeyColumnMap.length, columnSortOrder);
        byte[] columnNotNullMarker = getColumnNotNullMarker(keyTableDesc.getProperties(), reduceSinkKeyColumnMap.length, columnSortOrder);
        keyBinarySortableSerializeWrite = new BinarySortableSerializeWrite(columnSortOrder, columnNullMarker, columnNotNullMarker);
    }
    if (!isEmptyValue) {
        valueLazyBinarySerializeWrite = new LazyBinarySerializeWrite(reduceSinkValueColumnMap.length);
        valueVectorSerializeRow = new VectorSerializeRow<LazyBinarySerializeWrite>(valueLazyBinarySerializeWrite);
        valueVectorSerializeRow.init(reduceSinkValueTypeInfos, reduceSinkValueColumnMap);
        valueOutput = new Output();
        valueVectorSerializeRow.setOutput(valueOutput);
    }
    keyWritable = new HiveKey();
    valueBytesWritable = new BytesWritable();
    int limit = conf.getTopN();
    float memUsage = conf.getTopNMemoryUsage();
    if (limit >= 0 && memUsage > 0) {
        reducerHash = new TopNHash();
        reducerHash.initialize(limit, memUsage, conf.isMapGroupBy(), this, conf, hconf);
    }
    batchCounter = 0;
}
Also used : TopNHash(org.apache.hadoop.hive.ql.exec.TopNHash) LazyBinarySerializeWrite(org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinarySerializeWrite) BytesWritable(org.apache.hadoop.io.BytesWritable) BinarySortableSerializeWrite(org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite) HiveKey(org.apache.hadoop.hive.ql.io.HiveKey) Output(org.apache.hadoop.hive.serde2.ByteStream.Output) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) BaseWork(org.apache.hadoop.hive.ql.plan.BaseWork)

Aggregations

TopNHash (org.apache.hadoop.hive.ql.exec.TopNHash)1 HiveKey (org.apache.hadoop.hive.ql.io.HiveKey)1 BaseWork (org.apache.hadoop.hive.ql.plan.BaseWork)1 TableDesc (org.apache.hadoop.hive.ql.plan.TableDesc)1 Output (org.apache.hadoop.hive.serde2.ByteStream.Output)1 BinarySortableSerializeWrite (org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite)1 LazyBinarySerializeWrite (org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinarySerializeWrite)1 BytesWritable (org.apache.hadoop.io.BytesWritable)1