use of org.apache.hadoop.hive.ql.exec.vector.VectorizationContext in project hive by apache.
the class TestVectorNegative method doVectorArithmeticTest.
private void doVectorArithmeticTest(TypeInfo typeInfo, List<String> columns, String[] columnNames, TypeInfo[] typeInfos, DataTypePhysicalVariation[] dataTypePhysicalVariations, List<ExprNodeDesc> children, ExprNodeGenericFuncDesc exprDesc, NegativeTestMode negativeTestMode, VectorRandomBatchSource batchSource, ObjectInspector objectInspector, TypeInfo outputTypeInfo, Object[] resultObjects) throws Exception {
HiveConf hiveConf = new HiveConf();
if (negativeTestMode == NegativeTestMode.ADAPTOR) {
hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_TEST_VECTOR_ADAPTOR_OVERRIDE, true);
}
VectorizationContext vectorizationContext = new VectorizationContext("name", columns, Arrays.asList(typeInfos), Arrays.asList(dataTypePhysicalVariations), hiveConf);
VectorExpression vectorExpression = vectorizationContext.getVectorExpression(exprDesc);
vectorExpression.transientInit(hiveConf);
if (negativeTestMode == NegativeTestMode.VECTOR_EXPRESSION && vectorExpression instanceof VectorUDFAdaptor) {
System.out.println("*NO NATIVE VECTOR EXPRESSION* typeInfo " + typeInfo.toString() + " negativeTestMode " + negativeTestMode + " vectorExpression " + vectorExpression.toString());
}
String[] outputScratchTypeNames = vectorizationContext.getScratchColumnTypeNames();
VectorizedRowBatchCtx batchContext = new VectorizedRowBatchCtx(columnNames, typeInfos, dataTypePhysicalVariations, /* dataColumnNums */
null, /* partitionColumnCount */
0, /* virtualColumnCount */
0, /* neededVirtualColumns */
null, outputScratchTypeNames, null);
VectorizedRowBatch batch = batchContext.createVectorizedRowBatch();
VectorExtractRow resultVectorExtractRow = new VectorExtractRow();
resultVectorExtractRow.init(new TypeInfo[] { outputTypeInfo }, new int[] { vectorExpression.getOutputColumnNum() });
Object[] scrqtchRow = new Object[1];
// System.out.println("*VECTOR EXPRESSION* " + vectorExpression.getClass().getSimpleName());
/*
System.out.println(
"*DEBUG* typeInfo " + typeInfo.toString() +
" negativeTestMode " + negativeTestMode +
" vectorExpression " + vectorExpression.toString());
*/
batchSource.resetBatchIteration();
int rowIndex = 0;
while (true) {
if (!batchSource.fillNextBatch(batch)) {
break;
}
vectorExpression.evaluate(batch);
extractResultObjects(batch, rowIndex, resultVectorExtractRow, scrqtchRow, objectInspector, resultObjects);
rowIndex += batch.size;
}
}
use of org.apache.hadoop.hive.ql.exec.vector.VectorizationContext in project hive by apache.
the class Vectorizer method validateAndVectorizeOperatorTree.
private Operator<? extends OperatorDesc> validateAndVectorizeOperatorTree(Operator<? extends OperatorDesc> nonVecRootOperator, boolean isReduce, boolean isTezOrSpark, VectorTaskColumnInfo vectorTaskColumnInfo) throws VectorizerCannotVectorizeException {
VectorizationContext taskVContext = new VectorizationContext("Task", vectorTaskColumnInfo.allColumnNames, vectorTaskColumnInfo.allTypeInfos, vectorTaskColumnInfo.allDataTypePhysicalVariations, hiveConf);
List<Operator<? extends OperatorDesc>> currentParentList = newOperatorList();
currentParentList.add(nonVecRootOperator);
// Start with dummy vector operator as the parent of the parallel vector operator tree we are
// creating
Operator<? extends OperatorDesc> dummyVectorOperator = new DummyVectorOperator(taskVContext);
List<Operator<? extends OperatorDesc>> currentVectorParentList = newOperatorList();
currentVectorParentList.add(dummyVectorOperator);
delayedFixups.clear();
do {
List<Operator<? extends OperatorDesc>> nextParentList = newOperatorList();
List<Operator<? extends OperatorDesc>> nextVectorParentList = newOperatorList();
final int count = currentParentList.size();
for (int i = 0; i < count; i++) {
Operator<? extends OperatorDesc> parent = currentParentList.get(i);
List<Operator<? extends OperatorDesc>> childrenList = parent.getChildOperators();
if (childrenList == null || childrenList.size() == 0) {
continue;
}
Operator<? extends OperatorDesc> vectorParent = currentVectorParentList.get(i);
/*
* Vectorize this parent's children. Plug them into vectorParent's children list.
*
* Add those children / vector children to nextParentList / nextVectorParentList.
*/
doProcessChildren(parent, vectorParent, nextParentList, nextVectorParentList, isReduce, isTezOrSpark, vectorTaskColumnInfo);
}
currentParentList = nextParentList;
currentVectorParentList = nextVectorParentList;
} while (currentParentList.size() > 0);
runDelayedFixups();
return dummyVectorOperator;
}
use of org.apache.hadoop.hive.ql.exec.vector.VectorizationContext in project hive by apache.
the class TestVectorPTFGroupBatches method getFakeOperator.
private VectorPTFOperator getFakeOperator() throws HiveException {
VectorPTFDesc vectorPTFDesc = new VectorPTFDesc();
vectorPTFDesc.setVectorPTFInfo(new VectorPTFInfo());
vectorPTFDesc.setOutputColumnNames(new String[0]);
vectorPTFDesc.setEvaluatorFunctionNames(new String[0]);
return new VectorPTFOperator(new CompilationOpContext(), new PTFDesc(), new VectorizationContext("fake"), vectorPTFDesc);
}
use of org.apache.hadoop.hive.ql.exec.vector.VectorizationContext in project hive by apache.
the class TestVectorizer method testAggregateOnUDF.
@Test
public void testAggregateOnUDF() throws HiveException, VectorizerCannotVectorizeException {
ExprNodeColumnDesc colExprA = new ExprNodeColumnDesc(Integer.class, "col1", "T", false);
ExprNodeColumnDesc colExprB = new ExprNodeColumnDesc(Integer.class, "col2", "T", false);
List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>();
children.add(colExprA);
ExprNodeGenericFuncDesc exprNodeDesc = new ExprNodeGenericFuncDesc(TypeInfoFactory.intTypeInfo, new GenericUDFAbs(), children);
ArrayList<ExprNodeDesc> params = new ArrayList<ExprNodeDesc>();
params.add(exprNodeDesc);
List<ObjectInspector> paramOIs = new ArrayList<ObjectInspector>();
paramOIs.add(exprNodeDesc.getWritableObjectInspector());
AggregationDesc aggDesc = new AggregationDesc("sum", FunctionRegistry.getGenericUDAFEvaluator("sum", paramOIs, false, false), params, false, GenericUDAFEvaluator.Mode.PARTIAL1);
ArrayList<String> outputColumnNames = new ArrayList<String>();
outputColumnNames.add("_col0");
GroupByDesc desc = new GroupByDesc();
VectorGroupByDesc vectorDesc = new VectorGroupByDesc();
vectorDesc.setProcessingMode(ProcessingMode.HASH);
vectorDesc.setVecAggrDescs(new VectorAggregationDesc[] { new VectorAggregationDesc(aggDesc.getGenericUDAFName(), new GenericUDAFSum.GenericUDAFSumLong(), aggDesc.getMode(), TypeInfoFactory.longTypeInfo, ColumnVector.Type.LONG, null, TypeInfoFactory.longTypeInfo, ColumnVector.Type.LONG, VectorUDAFCountStar.class) });
desc.setOutputColumnNames(outputColumnNames);
ArrayList<AggregationDesc> aggDescList = new ArrayList<AggregationDesc>();
aggDescList.add(aggDesc);
desc.setAggregators(aggDescList);
ArrayList<ExprNodeDesc> grpByKeys = new ArrayList<ExprNodeDesc>();
grpByKeys.add(colExprB);
desc.setKeys(grpByKeys);
Operator<? extends OperatorDesc> gbyOp = OperatorFactory.get(new CompilationOpContext(), desc);
desc.setMode(GroupByDesc.Mode.HASH);
VectorizationContext ctx = new VectorizationContext("name", Arrays.asList(new String[] { "col1", "col2" }));
Vectorizer v = new Vectorizer();
v.testSetCurrentBaseWork(new MapWork());
VectorGroupByOperator vectorOp = (VectorGroupByOperator) Vectorizer.vectorizeGroupByOperator(gbyOp, ctx, vectorDesc);
Assert.assertEquals(VectorUDAFSumLong.class, vectorDesc.getVecAggrDescs()[0].getVecAggrClass());
}
use of org.apache.hadoop.hive.ql.exec.vector.VectorizationContext in project hive by apache.
the class TestVectorizer method setUp.
@Before
public void setUp() {
List<String> columns = new ArrayList<String>();
columns.add("col0");
columns.add("col1");
columns.add("col2");
columns.add("col3");
// Generate vectorized expression
vContext = new VectorizationContext("name", columns);
}
Aggregations