use of org.apache.hadoop.hive.ql.udf.generic.GenericUDFBucketNumber in project hive by apache.
the class ReduceSinkOperator method initializeOp.
@Override
protected void initializeOp(Configuration hconf) throws HiveException {
super.initializeOp(hconf);
try {
numRows = 0;
cntr = 1;
logEveryNRows = HiveConf.getLongVar(hconf, HiveConf.ConfVars.HIVE_LOG_N_RECORDS);
List<ExprNodeDesc> keys = conf.getKeyCols();
if (LOG.isDebugEnabled()) {
LOG.debug("keys size is " + keys.size());
for (ExprNodeDesc k : keys) {
LOG.debug("Key exprNodeDesc " + k.getExprString());
}
}
keyEval = new ExprNodeEvaluator[keys.size()];
int i = 0;
for (ExprNodeDesc e : keys) {
if (e instanceof ExprNodeGenericFuncDesc && ((ExprNodeGenericFuncDesc) e).getGenericUDF() instanceof GenericUDFBucketNumber) {
buckColIdxInKeyForSdpo = i;
}
keyEval[i++] = ExprNodeEvaluatorFactory.get(e);
}
numDistributionKeys = conf.getNumDistributionKeys();
distinctColIndices = conf.getDistinctColumnIndices();
numDistinctExprs = distinctColIndices.size();
valueEval = new ExprNodeEvaluator[conf.getValueCols().size()];
i = 0;
for (ExprNodeDesc e : conf.getValueCols()) {
valueEval[i++] = ExprNodeEvaluatorFactory.get(e);
}
partitionEval = new ExprNodeEvaluator[conf.getPartitionCols().size()];
i = 0;
for (ExprNodeDesc e : conf.getPartitionCols()) {
int index = ExprNodeDescUtils.indexOf(e, keys);
partitionEval[i++] = index < 0 ? ExprNodeEvaluatorFactory.get(e) : keyEval[index];
}
if (conf.getBucketCols() != null && !conf.getBucketCols().isEmpty()) {
bucketEval = new ExprNodeEvaluator[conf.getBucketCols().size()];
i = 0;
for (ExprNodeDesc e : conf.getBucketCols()) {
int index = ExprNodeDescUtils.indexOf(e, keys);
bucketEval[i++] = index < 0 ? ExprNodeEvaluatorFactory.get(e) : keyEval[index];
}
buckColIdxInKey = conf.getPartitionCols().size();
}
tag = conf.getTag();
tagByte[0] = (byte) tag;
skipTag = conf.getSkipTag();
LOG.info("Using tag = " + tag);
TableDesc keyTableDesc = conf.getKeySerializeInfo();
AbstractSerDe keySerDe = keyTableDesc.getSerDeClass().newInstance();
keySerDe.initialize(null, keyTableDesc.getProperties(), null);
keySerializer = keySerDe;
keyIsText = keySerializer.getSerializedClass().equals(Text.class);
TableDesc valueTableDesc = conf.getValueSerializeInfo();
AbstractSerDe valueSerDe = valueTableDesc.getSerDeClass().newInstance();
valueSerDe.initialize(null, valueTableDesc.getProperties(), null);
valueSerializer = valueSerDe;
int limit = conf.getTopN();
float memUsage = conf.getTopNMemoryUsage();
if (limit >= 0 && memUsage > 0) {
reducerHash = conf.isPTFReduceSink() ? new PTFTopNHash() : new TopNHash();
reducerHash.initialize(limit, memUsage, conf.isMapGroupBy(), this, conf, hconf);
}
useUniformHash = conf.getReducerTraits().contains(UNIFORM);
firstRow = true;
// acidOp flag has to be checked to use JAVA hash which works like
// identity function for integers, necessary to read RecordIdentifier
// incase of ACID updates/deletes.
boolean acidOp = conf.getWriteType() == AcidUtils.Operation.UPDATE || conf.getWriteType() == AcidUtils.Operation.DELETE;
hashFunc = getConf().getBucketingVersion() == 2 && !acidOp ? ObjectInspectorUtils::getBucketHashCode : ObjectInspectorUtils::getBucketHashCodeOld;
} catch (Exception e) {
String msg = "Error initializing ReduceSinkOperator: " + e.getMessage();
LOG.error(msg, e);
throw new RuntimeException(e);
}
}
Aggregations