use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression in project hive by apache.
the class VectorMapJoinInnerGenerateResultOperator method finishInnerRepeated.
protected void finishInnerRepeated(VectorizedRowBatch batch, JoinUtil.JoinResult joinResult, VectorMapJoinHashTableResult hashMapResult) throws HiveException, IOException {
int numSel = 0;
switch(joinResult) {
case MATCH:
if (bigTableValueExpressions != null) {
// Run our value expressions over whole batch.
for (VectorExpression ve : bigTableValueExpressions) {
ve.evaluate(batch);
}
}
// Generate special repeated case.
generateHashMapResultRepeatedAll(batch, hashMapResults[0]);
break;
case SPILL:
// Whole batch is spilled.
spillBatchRepeated(batch, (VectorMapJoinHashTableResult) hashMapResults[0]);
batch.size = 0;
break;
case NOMATCH:
// No match for entire batch.
batch.size = 0;
break;
}
}
use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression in project hive by apache.
the class VectorReduceSinkCommonOperator method process.
@Override
public void process(Object row, int tag) throws HiveException {
try {
VectorizedRowBatch batch = (VectorizedRowBatch) row;
batchCounter++;
if (batch.size == 0) {
if (LOG.isDebugEnabled()) {
LOG.debug(CLASS_NAME + " batch #" + batchCounter + " empty");
}
return;
}
// Perform any key expressions. Results will go into scratch columns.
if (reduceSinkKeyExpressions != null) {
for (VectorExpression ve : reduceSinkKeyExpressions) {
ve.evaluate(batch);
}
}
// Perform any value expressions. Results will go into scratch columns.
if (reduceSinkValueExpressions != null) {
for (VectorExpression ve : reduceSinkValueExpressions) {
ve.evaluate(batch);
}
}
serializedKeySeries.processBatch(batch);
boolean selectedInUse = batch.selectedInUse;
int[] selected = batch.selected;
int keyLength;
int logical;
int end;
int batchIndex;
do {
if (serializedKeySeries.getCurrentIsAllNull()) {
//
if (tag == -1 || reduceSkipTag) {
keyWritable.set(nullBytes, 0, nullBytes.length);
} else {
keyWritable.setSize(nullBytes.length + 1);
System.arraycopy(nullBytes, 0, keyWritable.get(), 0, nullBytes.length);
keyWritable.get()[nullBytes.length] = reduceTagByte;
}
keyWritable.setDistKeyLength(nullBytes.length);
keyWritable.setHashCode(nullKeyHashCode);
} else {
// One serialized key for 1 or more rows for the duplicate keys.
// LOG.info("reduceSkipTag " + reduceSkipTag + " tag " + tag + " reduceTagByte " + (int) reduceTagByte + " keyLength " + serializedKeySeries.getSerializedLength());
// LOG.info("process offset " + serializedKeySeries.getSerializedStart() + " length " + serializedKeySeries.getSerializedLength());
keyLength = serializedKeySeries.getSerializedLength();
if (tag == -1 || reduceSkipTag) {
keyWritable.set(serializedKeySeries.getSerializedBytes(), serializedKeySeries.getSerializedStart(), keyLength);
} else {
keyWritable.setSize(keyLength + 1);
System.arraycopy(serializedKeySeries.getSerializedBytes(), serializedKeySeries.getSerializedStart(), keyWritable.get(), 0, keyLength);
keyWritable.get()[keyLength] = reduceTagByte;
}
keyWritable.setDistKeyLength(keyLength);
keyWritable.setHashCode(serializedKeySeries.getCurrentHashCode());
}
logical = serializedKeySeries.getCurrentLogical();
end = logical + serializedKeySeries.getCurrentDuplicateCount();
do {
batchIndex = (selectedInUse ? selected[logical] : logical);
valueLazyBinarySerializeWrite.reset();
valueVectorSerializeRow.serializeWrite(batch, batchIndex);
valueBytesWritable.set(valueOutput.getData(), 0, valueOutput.getLength());
collect(keyWritable, valueBytesWritable);
} while (++logical < end);
if (!serializedKeySeries.next()) {
break;
}
} while (true);
} catch (Exception e) {
throw new HiveException(e);
}
}
use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression in project hive by apache.
the class Vectorizer method canSpecializeReduceSink.
private boolean canSpecializeReduceSink(ReduceSinkDesc desc, boolean isTezOrSpark, VectorizationContext vContext, VectorReduceSinkInfo vectorReduceSinkInfo) throws HiveException {
// Allocate a VectorReduceSinkDesc initially with key type NONE so EXPLAIN can report this
// operator was vectorized, but not native. And, the conditions.
VectorReduceSinkDesc vectorDesc = new VectorReduceSinkDesc();
desc.setVectorDesc(vectorDesc);
boolean isVectorizationReduceSinkNativeEnabled = HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_VECTORIZATION_REDUCESINK_NEW_ENABLED);
String engine = HiveConf.getVar(hiveConf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE);
boolean hasBuckets = desc.getBucketCols() != null && !desc.getBucketCols().isEmpty();
boolean hasTopN = desc.getTopN() >= 0;
boolean useUniformHash = desc.getReducerTraits().contains(UNIFORM);
boolean hasDistinctColumns = desc.getDistinctColumnIndices().size() > 0;
TableDesc keyTableDesc = desc.getKeySerializeInfo();
Class<? extends Deserializer> keySerializerClass = keyTableDesc.getDeserializerClass();
boolean isKeyBinarySortable = (keySerializerClass == org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe.class);
TableDesc valueTableDesc = desc.getValueSerializeInfo();
Class<? extends Deserializer> valueDeserializerClass = valueTableDesc.getDeserializerClass();
boolean isValueLazyBinary = (valueDeserializerClass == org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe.class);
// Remember the condition variables for EXPLAIN regardless.
vectorDesc.setIsVectorizationReduceSinkNativeEnabled(isVectorizationReduceSinkNativeEnabled);
vectorDesc.setEngine(engine);
vectorDesc.setHasBuckets(hasBuckets);
vectorDesc.setHasTopN(hasTopN);
vectorDesc.setUseUniformHash(useUniformHash);
vectorDesc.setHasDistinctColumns(hasDistinctColumns);
vectorDesc.setIsKeyBinarySortable(isKeyBinarySortable);
vectorDesc.setIsValueLazyBinary(isValueLazyBinary);
// Many restrictions.
if (!isVectorizationReduceSinkNativeEnabled || !isTezOrSpark || hasBuckets || hasTopN || !useUniformHash || hasDistinctColumns || !isKeyBinarySortable || !isValueLazyBinary) {
return false;
}
// We are doing work here we'd normally do in VectorGroupByCommonOperator's constructor.
// So if we later decide not to specialize, we'll just waste any scratch columns allocated...
List<ExprNodeDesc> keysDescs = desc.getKeyCols();
VectorExpression[] allKeyExpressions = vContext.getVectorExpressions(keysDescs);
// Since a key expression can be a calculation and the key will go into a scratch column,
// we need the mapping and type information.
int[] reduceSinkKeyColumnMap = new int[allKeyExpressions.length];
TypeInfo[] reduceSinkKeyTypeInfos = new TypeInfo[allKeyExpressions.length];
Type[] reduceSinkKeyColumnVectorTypes = new Type[allKeyExpressions.length];
ArrayList<VectorExpression> groupByKeyExpressionsList = new ArrayList<VectorExpression>();
VectorExpression[] reduceSinkKeyExpressions;
for (int i = 0; i < reduceSinkKeyColumnMap.length; i++) {
VectorExpression ve = allKeyExpressions[i];
reduceSinkKeyColumnMap[i] = ve.getOutputColumn();
reduceSinkKeyTypeInfos[i] = keysDescs.get(i).getTypeInfo();
reduceSinkKeyColumnVectorTypes[i] = VectorizationContext.getColumnVectorTypeFromTypeInfo(reduceSinkKeyTypeInfos[i]);
if (!IdentityExpression.isColumnOnly(ve)) {
groupByKeyExpressionsList.add(ve);
}
}
if (groupByKeyExpressionsList.size() == 0) {
reduceSinkKeyExpressions = null;
} else {
reduceSinkKeyExpressions = groupByKeyExpressionsList.toArray(new VectorExpression[0]);
}
ArrayList<ExprNodeDesc> valueDescs = desc.getValueCols();
VectorExpression[] allValueExpressions = vContext.getVectorExpressions(valueDescs);
int[] reduceSinkValueColumnMap = new int[valueDescs.size()];
TypeInfo[] reduceSinkValueTypeInfos = new TypeInfo[valueDescs.size()];
Type[] reduceSinkValueColumnVectorTypes = new Type[valueDescs.size()];
ArrayList<VectorExpression> reduceSinkValueExpressionsList = new ArrayList<VectorExpression>();
VectorExpression[] reduceSinkValueExpressions;
for (int i = 0; i < valueDescs.size(); ++i) {
VectorExpression ve = allValueExpressions[i];
reduceSinkValueColumnMap[i] = ve.getOutputColumn();
reduceSinkValueTypeInfos[i] = valueDescs.get(i).getTypeInfo();
reduceSinkValueColumnVectorTypes[i] = VectorizationContext.getColumnVectorTypeFromTypeInfo(reduceSinkValueTypeInfos[i]);
if (!IdentityExpression.isColumnOnly(ve)) {
reduceSinkValueExpressionsList.add(ve);
}
}
if (reduceSinkValueExpressionsList.size() == 0) {
reduceSinkValueExpressions = null;
} else {
reduceSinkValueExpressions = reduceSinkValueExpressionsList.toArray(new VectorExpression[0]);
}
vectorReduceSinkInfo.setReduceSinkKeyColumnMap(reduceSinkKeyColumnMap);
vectorReduceSinkInfo.setReduceSinkKeyTypeInfos(reduceSinkKeyTypeInfos);
vectorReduceSinkInfo.setReduceSinkKeyColumnVectorTypes(reduceSinkKeyColumnVectorTypes);
vectorReduceSinkInfo.setReduceSinkKeyExpressions(reduceSinkKeyExpressions);
vectorReduceSinkInfo.setReduceSinkValueColumnMap(reduceSinkValueColumnMap);
vectorReduceSinkInfo.setReduceSinkValueTypeInfos(reduceSinkValueTypeInfos);
vectorReduceSinkInfo.setReduceSinkValueColumnVectorTypes(reduceSinkValueColumnVectorTypes);
vectorReduceSinkInfo.setReduceSinkValueExpressions(reduceSinkValueExpressions);
return true;
}
use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression in project hive by apache.
the class Vectorizer method vectorizeFilterOperator.
public static Operator<? extends OperatorDesc> vectorizeFilterOperator(Operator<? extends OperatorDesc> filterOp, VectorizationContext vContext) throws HiveException {
FilterDesc filterDesc = (FilterDesc) filterOp.getConf();
VectorFilterDesc vectorFilterDesc = new VectorFilterDesc();
filterDesc.setVectorDesc(vectorFilterDesc);
ExprNodeDesc predicateExpr = filterDesc.getPredicate();
VectorExpression vectorPredicateExpr = vContext.getVectorExpression(predicateExpr, VectorExpressionDescriptor.Mode.FILTER);
vectorFilterDesc.setPredicateExpression(vectorPredicateExpr);
return OperatorFactory.getVectorOperator(filterOp.getCompilationOpContext(), filterDesc, vContext);
}
use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression in project hive by apache.
the class Vectorizer method vectorizeSelectOperator.
public static Operator<? extends OperatorDesc> vectorizeSelectOperator(Operator<? extends OperatorDesc> selectOp, VectorizationContext vContext) throws HiveException {
SelectDesc selectDesc = (SelectDesc) selectOp.getConf();
VectorSelectDesc vectorSelectDesc = new VectorSelectDesc();
selectDesc.setVectorDesc(vectorSelectDesc);
List<ExprNodeDesc> colList = selectDesc.getColList();
int index = 0;
final int size = colList.size();
VectorExpression[] vectorSelectExprs = new VectorExpression[size];
int[] projectedOutputColumns = new int[size];
for (int i = 0; i < size; i++) {
ExprNodeDesc expr = colList.get(i);
VectorExpression ve = vContext.getVectorExpression(expr);
projectedOutputColumns[i] = ve.getOutputColumn();
if (ve instanceof IdentityExpression) {
// Suppress useless evaluation.
continue;
}
vectorSelectExprs[index++] = ve;
}
if (index < size) {
vectorSelectExprs = Arrays.copyOf(vectorSelectExprs, index);
}
vectorSelectDesc.setSelectExpressions(vectorSelectExprs);
vectorSelectDesc.setProjectedOutputColumns(projectedOutputColumns);
return OperatorFactory.getVectorOperator(selectOp.getCompilationOpContext(), selectDesc, vContext);
}
Aggregations