use of org.apache.hadoop.hive.ql.exec.vector.VectorizationContext in project hive by apache.
the class TestVectorIfStatement method doVectorIfTest.
private void doVectorIfTest(TypeInfo typeInfo, IfVariation ifVariation, List<String> columns, String[] columnNames, TypeInfo[] typeInfos, DataTypePhysicalVariation[] dataTypePhysicalVariations, List<ExprNodeDesc> children, IfStmtTestMode ifStmtTestMode, ColumnScalarMode columnScalarMode, VectorRandomBatchSource batchSource, Object[] resultObjects) throws Exception {
final boolean isFilter = ifVariation.isFilter;
GenericUDF udf;
switch(ifStmtTestMode) {
case VECTOR_EXPRESSION:
udf = new GenericUDFIf();
break;
case ADAPTOR_WHEN:
udf = new GenericUDFWhen();
break;
default:
throw new RuntimeException("Unexpected IF statement test mode " + ifStmtTestMode);
}
ExprNodeGenericFuncDesc exprDesc = new ExprNodeGenericFuncDesc(typeInfo, udf, children);
String ifExprMode = (ifStmtTestMode != IfStmtTestMode.VECTOR_EXPRESSION ? "adaptor" : "good");
HiveConf hiveConf = new HiveConf();
hiveConf.setVar(HiveConf.ConfVars.HIVE_VECTORIZED_IF_EXPR_MODE, ifExprMode);
VectorizationContext vectorizationContext = new VectorizationContext("name", columns, Arrays.asList(typeInfos), Arrays.asList(dataTypePhysicalVariations), hiveConf);
VectorExpression vectorExpression = vectorizationContext.getVectorExpression(exprDesc, (isFilter ? VectorExpressionDescriptor.Mode.FILTER : VectorExpressionDescriptor.Mode.PROJECTION));
final TypeInfo outputTypeInfo;
final ObjectInspector objectInspector;
if (!isFilter) {
outputTypeInfo = vectorExpression.getOutputTypeInfo();
objectInspector = TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(outputTypeInfo);
} else {
outputTypeInfo = null;
objectInspector = null;
}
if (ifStmtTestMode == IfStmtTestMode.VECTOR_EXPRESSION && vectorExpression instanceof VectorUDFAdaptor) {
System.out.println("*NO NATIVE VECTOR EXPRESSION* typeInfo " + typeInfo.toString() + " ifStmtTestMode " + ifStmtTestMode + " ifVariation " + ifVariation + " columnScalarMode " + columnScalarMode + " vectorExpression " + vectorExpression.toString());
}
String[] outputScratchTypeNames = vectorizationContext.getScratchColumnTypeNames();
DataTypePhysicalVariation[] outputDataTypePhysicalVariations = vectorizationContext.getScratchDataTypePhysicalVariations();
VectorizedRowBatchCtx batchContext = new VectorizedRowBatchCtx(columnNames, typeInfos, dataTypePhysicalVariations, /* dataColumnNums */
null, /* partitionColumnCount */
0, /* virtualColumnCount */
0, /* neededVirtualColumns */
null, outputScratchTypeNames, outputDataTypePhysicalVariations);
VectorizedRowBatch batch = batchContext.createVectorizedRowBatch();
// System.out.println("*VECTOR EXPRESSION* " + vectorExpression.getClass().getSimpleName());
/*
System.out.println(
"*DEBUG* typeInfo " + typeInfo.toString() +
" ifStmtTestMode " + ifStmtTestMode +
" ifVariation " + ifVariation +
" columnScalarMode " + columnScalarMode +
" vectorExpression " + vectorExpression.toString());
*/
VectorExtractRow resultVectorExtractRow = null;
Object[] scrqtchRow = null;
if (!isFilter) {
resultVectorExtractRow = new VectorExtractRow();
final int outputColumnNum = vectorExpression.getOutputColumnNum();
resultVectorExtractRow.init(new TypeInfo[] { outputTypeInfo }, new int[] { outputColumnNum });
scrqtchRow = new Object[1];
}
boolean copySelectedInUse = false;
int[] copySelected = new int[VectorizedRowBatch.DEFAULT_SIZE];
batchSource.resetBatchIteration();
int rowIndex = 0;
while (true) {
if (!batchSource.fillNextBatch(batch)) {
break;
}
final int originalBatchSize = batch.size;
if (isFilter) {
copySelectedInUse = batch.selectedInUse;
if (batch.selectedInUse) {
System.arraycopy(batch.selected, 0, copySelected, 0, originalBatchSize);
}
}
// In filter mode, the batch size can be made smaller.
vectorExpression.evaluate(batch);
if (!isFilter) {
extractResultObjects(batch, rowIndex, resultVectorExtractRow, scrqtchRow, objectInspector, resultObjects);
} else {
final int currentBatchSize = batch.size;
if (copySelectedInUse && batch.selectedInUse) {
int selectIndex = 0;
for (int i = 0; i < originalBatchSize; i++) {
final int originalBatchIndex = copySelected[i];
final boolean booleanResult;
if (selectIndex < currentBatchSize && batch.selected[selectIndex] == originalBatchIndex) {
booleanResult = true;
selectIndex++;
} else {
booleanResult = false;
}
resultObjects[rowIndex + i] = new BooleanWritable(booleanResult);
}
} else if (batch.selectedInUse) {
int selectIndex = 0;
for (int i = 0; i < originalBatchSize; i++) {
final boolean booleanResult;
if (selectIndex < currentBatchSize && batch.selected[selectIndex] == i) {
booleanResult = true;
selectIndex++;
} else {
booleanResult = false;
}
resultObjects[rowIndex + i] = new BooleanWritable(booleanResult);
}
} else if (currentBatchSize == 0) {
// Whole batch got zapped.
for (int i = 0; i < originalBatchSize; i++) {
resultObjects[rowIndex + i] = new BooleanWritable(false);
}
} else {
// Every row kept.
for (int i = 0; i < originalBatchSize; i++) {
resultObjects[rowIndex + i] = new BooleanWritable(true);
}
}
}
rowIndex += originalBatchSize;
}
}
use of org.apache.hadoop.hive.ql.exec.vector.VectorizationContext in project hive by apache.
the class FakeCaptureVectorToRowOutputOperator method initializeOp.
@Override
public void initializeOp(Configuration conf) throws HiveException {
super.initializeOp(conf);
VectorizationContextRegion vectorizationContextRegion = (VectorizationContextRegion) op;
VectorizationContext outputVectorizationContext = vectorizationContextRegion.getOutputVectorizationContext();
outputTypeInfos = outputVectorizationContext.getInitialTypeInfos();
final int outputLength = outputTypeInfos.length;
outputObjectInspectors = new ObjectInspector[outputLength];
for (int i = 0; i < outputLength; i++) {
TypeInfo typeInfo = outputTypeInfos[i];
outputObjectInspectors[i] = TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(typeInfo);
}
vectorExtractRow = new VectorExtractRow();
vectorExtractRow.init(outputTypeInfos);
}
use of org.apache.hadoop.hive.ql.exec.vector.VectorizationContext in project hive by apache.
the class Vectorizer method validateAggregationDesc.
private Pair<Boolean, Boolean> validateAggregationDesc(AggregationDesc aggDesc, ProcessingMode processingMode, boolean hasKeys) {
String udfName = aggDesc.getGenericUDAFName().toLowerCase();
if (!supportedAggregationUdfs.contains(udfName)) {
setExpressionIssue("Aggregation Function", "UDF " + udfName + " not supported");
return new Pair<Boolean, Boolean>(false, false);
}
if (aggDesc.getParameters() != null && !validateExprNodeDesc(aggDesc.getParameters(), "Aggregation Function UDF " + udfName + " parameter")) {
return new Pair<Boolean, Boolean>(false, false);
}
// See if we can vectorize the aggregation.
VectorizationContext vc = new ValidatorVectorizationContext(hiveConf);
VectorAggregateExpression vectorAggrExpr;
try {
vectorAggrExpr = vc.getAggregatorExpression(aggDesc);
} catch (Exception e) {
// We should have already attempted to vectorize in validateAggregationDesc.
if (LOG.isDebugEnabled()) {
LOG.debug("Vectorization of aggregation should have succeeded ", e);
}
setExpressionIssue("Aggregation Function", "Vectorization of aggreation should have succeeded " + e);
return new Pair<Boolean, Boolean>(false, false);
}
if (LOG.isDebugEnabled()) {
LOG.debug("Aggregation " + aggDesc.getExprString() + " --> " + " vector expression " + vectorAggrExpr.toString());
}
ObjectInspector.Category outputCategory = aggregationOutputCategory(vectorAggrExpr);
boolean outputIsPrimitive = (outputCategory == ObjectInspector.Category.PRIMITIVE);
if (processingMode == ProcessingMode.MERGE_PARTIAL && hasKeys && !outputIsPrimitive) {
setOperatorIssue("Vectorized Reduce MergePartial GROUP BY keys can only handle aggregate outputs that are primitive types");
return new Pair<Boolean, Boolean>(false, false);
}
return new Pair<Boolean, Boolean>(true, outputIsPrimitive);
}
use of org.apache.hadoop.hive.ql.exec.vector.VectorizationContext in project hive by apache.
the class MapJoinTestConfig method createMapJoin.
public static MapJoinOperator createMapJoin(MapJoinTestDescription testDesc, Operator<? extends OperatorDesc> collectorOperator, MapJoinTestData testData, MapJoinDesc mapJoinDesc, boolean isVectorMapJoin, boolean isOriginalMapJoin) throws SerDeException, IOException, HiveException {
final Byte bigTablePos = 0;
MapJoinTableContainerSerDe mapJoinTableContainerSerDe = MapJoinTestConfig.createMapJoinTableContainerSerDe(mapJoinDesc);
MapJoinObjectSerDeContext valCtx = mapJoinTableContainerSerDe.getValueContext();
MapJoinTableContainer mapJoinTableContainer = (isOriginalMapJoin ? new HashMapWrapper(testDesc.hiveConf, -1) : new MapJoinBytesTableContainer(testDesc.hiveConf, valCtx, testData.smallTableKeyHashMap.size(), 0));
mapJoinTableContainer.setSerde(mapJoinTableContainerSerDe.getKeyContext(), mapJoinTableContainerSerDe.getValueContext());
loadTableContainerData(testDesc, testData, mapJoinTableContainer);
MapJoinOperator operator;
if (!isVectorMapJoin) {
operator = new MapJoinOperator(new CompilationOpContext());
operator.setConf(mapJoinDesc);
} else {
VectorizationContext vContext = new VectorizationContext("test", testDesc.bigTableColumnNamesList);
// Create scratch columns to hold small table results.
for (int i = 0; i < testDesc.smallTableValueTypeInfos.length; i++) {
vContext.allocateScratchColumn(testDesc.smallTableValueTypeInfos[i]);
}
// This is what the Vectorizer class does.
VectorMapJoinDesc vectorMapJoinDesc = new VectorMapJoinDesc();
byte posBigTable = (byte) mapJoinDesc.getPosBigTable();
VectorExpression[] allBigTableKeyExpressions = vContext.getVectorExpressions(mapJoinDesc.getKeys().get(posBigTable));
vectorMapJoinDesc.setAllBigTableKeyExpressions(allBigTableKeyExpressions);
Map<Byte, List<ExprNodeDesc>> exprs = mapJoinDesc.getExprs();
VectorExpression[] allBigTableValueExpressions = vContext.getVectorExpressions(exprs.get(posBigTable));
vectorMapJoinDesc.setAllBigTableValueExpressions(allBigTableValueExpressions);
List<ExprNodeDesc> bigTableFilters = mapJoinDesc.getFilters().get(bigTablePos);
boolean isOuterAndFiltered = (!mapJoinDesc.isNoOuterJoin() && bigTableFilters.size() > 0);
if (!isOuterAndFiltered) {
operator = new VectorMapJoinOperator(new CompilationOpContext(), mapJoinDesc, vContext, vectorMapJoinDesc);
} else {
operator = new VectorMapJoinOuterFilteredOperator(new CompilationOpContext(), mapJoinDesc, vContext, vectorMapJoinDesc);
}
}
MapJoinTestConfig.connectOperators(testDesc, operator, collectorOperator);
operator.setTestMapJoinTableContainer(1, mapJoinTableContainer, mapJoinTableContainerSerDe);
return operator;
}
use of org.apache.hadoop.hive.ql.exec.vector.VectorizationContext in project hive by apache.
the class MapJoinTestConfig method createNativeVectorMapJoin.
public static MapJoinOperator createNativeVectorMapJoin(MapJoinTestDescription testDesc, Operator<? extends OperatorDesc> collectorOperator, MapJoinTestData testData, MapJoinDesc mapJoinDesc, HashTableImplementationType hashTableImplementationType) throws SerDeException, IOException, HiveException {
VectorMapJoinDesc vectorDesc = MapJoinTestConfig.createVectorMapJoinDesc(testDesc);
// UNDONE
mapJoinDesc.setVectorDesc(vectorDesc);
vectorDesc.setHashTableImplementationType(hashTableImplementationType);
VectorMapJoinInfo vectorMapJoinInfo = vectorDesc.getVectorMapJoinInfo();
MapJoinTableContainer mapJoinTableContainer;
switch(vectorDesc.getHashTableImplementationType()) {
case OPTIMIZED:
mapJoinTableContainer = new MapJoinBytesTableContainer(testDesc.hiveConf, null, testData.smallTableKeyHashMap.size(), 0);
MapJoinTableContainerSerDe mapJoinTableContainerSerDe = MapJoinTestConfig.createMapJoinTableContainerSerDe(mapJoinDesc);
mapJoinTableContainer.setSerde(mapJoinTableContainerSerDe.getKeyContext(), mapJoinTableContainerSerDe.getValueContext());
break;
case FAST:
mapJoinTableContainer = new VectorMapJoinFastTableContainer(mapJoinDesc, testDesc.hiveConf, testData.smallTableKeyHashMap.size());
break;
default:
throw new RuntimeException("Unexpected hash table implementation type " + vectorDesc.getHashTableImplementationType());
}
loadTableContainerData(testDesc, testData, mapJoinTableContainer);
VectorizationContext vContext = MapJoinTestConfig.createVectorizationContext(testDesc);
byte posBigTable = (byte) mapJoinDesc.getPosBigTable();
VectorExpression[] slimmedBigTableKeyExpressions = vContext.getVectorExpressions(mapJoinDesc.getKeys().get(posBigTable));
vectorMapJoinInfo.setSlimmedBigTableKeyExpressions(slimmedBigTableKeyExpressions);
Map<Byte, List<ExprNodeDesc>> exprs = mapJoinDesc.getExprs();
VectorExpression[] slimmedBigTableValueExpressions = vContext.getVectorExpressions(exprs.get(posBigTable));
vectorMapJoinInfo.setSlimmedBigTableValueExpressions(slimmedBigTableValueExpressions);
VectorMapJoinCommonOperator operator = MapJoinTestConfig.createNativeVectorMapJoinOperator(testDesc.vectorMapJoinVariation, mapJoinDesc, vectorDesc, vContext);
MapJoinTestConfig.connectOperators(testDesc, operator, collectorOperator);
operator.setTestMapJoinTableContainer(1, mapJoinTableContainer, null);
return operator;
}
Aggregations