use of org.apache.hadoop.hive.ql.udf.generic.GenericUDFIf in project hive by apache.
the class TestVectorIfStatement method doVectorIfTest.
private void doVectorIfTest(TypeInfo typeInfo, IfVariation ifVariation, List<String> columns, String[] columnNames, TypeInfo[] typeInfos, DataTypePhysicalVariation[] dataTypePhysicalVariations, List<ExprNodeDesc> children, IfStmtTestMode ifStmtTestMode, ColumnScalarMode columnScalarMode, VectorRandomBatchSource batchSource, Object[] resultObjects) throws Exception {
final boolean isFilter = ifVariation.isFilter;
GenericUDF udf;
switch(ifStmtTestMode) {
case VECTOR_EXPRESSION:
udf = new GenericUDFIf();
break;
case ADAPTOR_WHEN:
udf = new GenericUDFWhen();
break;
default:
throw new RuntimeException("Unexpected IF statement test mode " + ifStmtTestMode);
}
ExprNodeGenericFuncDesc exprDesc = new ExprNodeGenericFuncDesc(typeInfo, udf, children);
String ifExprMode = (ifStmtTestMode != IfStmtTestMode.VECTOR_EXPRESSION ? "adaptor" : "good");
HiveConf hiveConf = new HiveConf();
hiveConf.setVar(HiveConf.ConfVars.HIVE_VECTORIZED_IF_EXPR_MODE, ifExprMode);
VectorizationContext vectorizationContext = new VectorizationContext("name", columns, Arrays.asList(typeInfos), Arrays.asList(dataTypePhysicalVariations), hiveConf);
VectorExpression vectorExpression = vectorizationContext.getVectorExpression(exprDesc, (isFilter ? VectorExpressionDescriptor.Mode.FILTER : VectorExpressionDescriptor.Mode.PROJECTION));
final TypeInfo outputTypeInfo;
final ObjectInspector objectInspector;
if (!isFilter) {
outputTypeInfo = vectorExpression.getOutputTypeInfo();
objectInspector = TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(outputTypeInfo);
} else {
outputTypeInfo = null;
objectInspector = null;
}
if (ifStmtTestMode == IfStmtTestMode.VECTOR_EXPRESSION && vectorExpression instanceof VectorUDFAdaptor) {
System.out.println("*NO NATIVE VECTOR EXPRESSION* typeInfo " + typeInfo.toString() + " ifStmtTestMode " + ifStmtTestMode + " ifVariation " + ifVariation + " columnScalarMode " + columnScalarMode + " vectorExpression " + vectorExpression.toString());
}
String[] outputScratchTypeNames = vectorizationContext.getScratchColumnTypeNames();
DataTypePhysicalVariation[] outputDataTypePhysicalVariations = vectorizationContext.getScratchDataTypePhysicalVariations();
VectorizedRowBatchCtx batchContext = new VectorizedRowBatchCtx(columnNames, typeInfos, dataTypePhysicalVariations, /* dataColumnNums */
null, /* partitionColumnCount */
0, /* virtualColumnCount */
0, /* neededVirtualColumns */
null, outputScratchTypeNames, outputDataTypePhysicalVariations);
VectorizedRowBatch batch = batchContext.createVectorizedRowBatch();
// System.out.println("*VECTOR EXPRESSION* " + vectorExpression.getClass().getSimpleName());
/*
System.out.println(
"*DEBUG* typeInfo " + typeInfo.toString() +
" ifStmtTestMode " + ifStmtTestMode +
" ifVariation " + ifVariation +
" columnScalarMode " + columnScalarMode +
" vectorExpression " + vectorExpression.toString());
*/
VectorExtractRow resultVectorExtractRow = null;
Object[] scrqtchRow = null;
if (!isFilter) {
resultVectorExtractRow = new VectorExtractRow();
final int outputColumnNum = vectorExpression.getOutputColumnNum();
resultVectorExtractRow.init(new TypeInfo[] { outputTypeInfo }, new int[] { outputColumnNum });
scrqtchRow = new Object[1];
}
boolean copySelectedInUse = false;
int[] copySelected = new int[VectorizedRowBatch.DEFAULT_SIZE];
batchSource.resetBatchIteration();
int rowIndex = 0;
while (true) {
if (!batchSource.fillNextBatch(batch)) {
break;
}
final int originalBatchSize = batch.size;
if (isFilter) {
copySelectedInUse = batch.selectedInUse;
if (batch.selectedInUse) {
System.arraycopy(batch.selected, 0, copySelected, 0, originalBatchSize);
}
}
// In filter mode, the batch size can be made smaller.
vectorExpression.evaluate(batch);
if (!isFilter) {
extractResultObjects(batch, rowIndex, resultVectorExtractRow, scrqtchRow, objectInspector, resultObjects);
} else {
final int currentBatchSize = batch.size;
if (copySelectedInUse && batch.selectedInUse) {
int selectIndex = 0;
for (int i = 0; i < originalBatchSize; i++) {
final int originalBatchIndex = copySelected[i];
final boolean booleanResult;
if (selectIndex < currentBatchSize && batch.selected[selectIndex] == originalBatchIndex) {
booleanResult = true;
selectIndex++;
} else {
booleanResult = false;
}
resultObjects[rowIndex + i] = new BooleanWritable(booleanResult);
}
} else if (batch.selectedInUse) {
int selectIndex = 0;
for (int i = 0; i < originalBatchSize; i++) {
final boolean booleanResult;
if (selectIndex < currentBatchSize && batch.selected[selectIndex] == i) {
booleanResult = true;
selectIndex++;
} else {
booleanResult = false;
}
resultObjects[rowIndex + i] = new BooleanWritable(booleanResult);
}
} else if (currentBatchSize == 0) {
// Whole batch got zapped.
for (int i = 0; i < originalBatchSize; i++) {
resultObjects[rowIndex + i] = new BooleanWritable(false);
}
} else {
// Every row kept.
for (int i = 0; i < originalBatchSize; i++) {
resultObjects[rowIndex + i] = new BooleanWritable(true);
}
}
}
rowIndex += originalBatchSize;
}
}
use of org.apache.hadoop.hive.ql.udf.generic.GenericUDFIf in project hive by apache.
the class TestVectorizationContext method testIfConditionalExprs.
/**
* Test that correct VectorExpression classes are chosen for the
* IF (expr1, expr2, expr3) conditional expression for integer, float,
* boolean, timestamp and string input types. expr1 is always an input column expression
* of type long. expr2 and expr3 can be column expressions or constants of other types
* but must have the same type.
*/
@Test
public void testIfConditionalExprs() throws HiveException {
// Predicate.
ExprNodeColumnDesc col1Expr = new ExprNodeColumnDesc(Boolean.class, "col1", "table", false);
ExprNodeColumnDesc col2Expr = new ExprNodeColumnDesc(Long.class, "col2", "table", false);
ExprNodeColumnDesc col3Expr = new ExprNodeColumnDesc(Long.class, "col3", "table", false);
ExprNodeConstantDesc constDesc2 = new ExprNodeConstantDesc(Integer.valueOf(1));
ExprNodeConstantDesc constDesc3 = new ExprNodeConstantDesc(Integer.valueOf(2));
// long column/column IF
GenericUDFIf udf = new GenericUDFIf();
List<ExprNodeDesc> children1 = new ArrayList<ExprNodeDesc>();
children1.add(col1Expr);
children1.add(col2Expr);
children1.add(col3Expr);
ExprNodeGenericFuncDesc exprDesc = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, udf, children1);
List<String> columns = new ArrayList<String>();
columns.add("col0");
columns.add("col1");
columns.add("col2");
columns.add("col3");
VectorizationContext vc = new VectorizationContext("name", columns);
exprDesc.setTypeInfo(TypeInfoFactory.longTypeInfo);
VectorExpression ve = vc.getVectorExpression(exprDesc);
assertTrue(ve instanceof IfExprLongColumnLongColumn);
// long column/scalar IF
children1.set(2, new ExprNodeConstantDesc(1L));
ve = vc.getVectorExpression(exprDesc);
assertTrue(ve instanceof IfExprLongColumnLongScalar);
// long scalar/scalar IF
children1.set(1, new ExprNodeConstantDesc(1L));
ve = vc.getVectorExpression(exprDesc);
assertTrue(ve instanceof IfExprLongScalarLongScalar);
// long scalar/column IF
children1.set(2, col3Expr);
ve = vc.getVectorExpression(exprDesc);
assertTrue(ve instanceof IfExprLongScalarLongColumn);
// test for double type
col2Expr = new ExprNodeColumnDesc(Double.class, "col2", "table", false);
col3Expr = new ExprNodeColumnDesc(Double.class, "col3", "table", false);
// double column/column IF
children1.set(1, col2Expr);
children1.set(2, col3Expr);
exprDesc.setTypeInfo(TypeInfoFactory.doubleTypeInfo);
ve = vc.getVectorExpression(exprDesc);
assertTrue(ve instanceof IfExprDoubleColumnDoubleColumn);
// double column/scalar IF
children1.set(2, new ExprNodeConstantDesc(1D));
ve = vc.getVectorExpression(exprDesc);
assertTrue(ve instanceof IfExprDoubleColumnDoubleScalar);
// double scalar/scalar IF
children1.set(1, new ExprNodeConstantDesc(1D));
ve = vc.getVectorExpression(exprDesc);
assertTrue(ve instanceof IfExprDoubleScalarDoubleScalar);
// double scalar/column IF
children1.set(2, col3Expr);
ve = vc.getVectorExpression(exprDesc);
assertTrue(ve instanceof IfExprDoubleScalarDoubleColumn);
// double scalar/long column IF
children1.set(2, new ExprNodeColumnDesc(Long.class, "col3", "table", false));
ve = vc.getVectorExpression(exprDesc);
assertTrue(ve instanceof IfExprColumnCondExpr);
// Additional combinations of (long,double)X(column,scalar) for each of the second
// and third arguments are omitted. We have coverage of all the source templates
// already.
// test for timestamp type
col2Expr = new ExprNodeColumnDesc(Timestamp.class, "col2", "table", false);
col3Expr = new ExprNodeColumnDesc(Timestamp.class, "col3", "table", false);
// timestamp column/column IF
children1.set(1, col2Expr);
children1.set(2, col3Expr);
exprDesc.setTypeInfo(TypeInfoFactory.timestampTypeInfo);
ve = vc.getVectorExpression(exprDesc);
assertTrue(ve instanceof IfExprTimestampColumnColumn);
// timestamp column/scalar IF where scalar is really a CAST of a constant to timestamp.
ExprNodeGenericFuncDesc f = new ExprNodeGenericFuncDesc();
f.setGenericUDF(new GenericUDFTimestamp());
f.setTypeInfo(TypeInfoFactory.timestampTypeInfo);
List<ExprNodeDesc> children2 = new ArrayList<ExprNodeDesc>();
f.setChildren(children2);
children2.add(new ExprNodeConstantDesc("2013-11-05 00:00:00.000"));
children1.set(2, f);
ve = vc.getVectorExpression(exprDesc);
assertTrue(ve instanceof IfExprTimestampColumnScalar);
// timestamp scalar/scalar
children1.set(1, f);
ve = vc.getVectorExpression(exprDesc);
assertTrue(ve instanceof IfExprTimestampScalarScalar);
// timestamp scalar/column
children1.set(2, col3Expr);
ve = vc.getVectorExpression(exprDesc);
assertTrue(ve instanceof IfExprTimestampScalarColumn);
// test for boolean type
col2Expr = new ExprNodeColumnDesc(Boolean.class, "col2", "table", false);
col3Expr = new ExprNodeColumnDesc(Boolean.class, "col3", "table", false);
// column/column
children1.set(1, col2Expr);
children1.set(2, col3Expr);
exprDesc.setTypeInfo(TypeInfoFactory.booleanTypeInfo);
ve = vc.getVectorExpression(exprDesc);
assertTrue(ve instanceof IfExprLongColumnLongColumn);
// column/scalar IF
children1.set(2, new ExprNodeConstantDesc(true));
ve = vc.getVectorExpression(exprDesc);
assertTrue(ve instanceof IfExprLongColumnLongScalar);
// scalar/scalar IF
children1.set(1, new ExprNodeConstantDesc(true));
ve = vc.getVectorExpression(exprDesc);
assertTrue(ve instanceof IfExprLongScalarLongScalar);
// scalar/column IF
children1.set(2, col3Expr);
ve = vc.getVectorExpression(exprDesc);
assertTrue(ve instanceof IfExprLongScalarLongColumn);
// test for string type
constDesc2 = new ExprNodeConstantDesc("Alpha");
constDesc3 = new ExprNodeConstantDesc("Bravo");
col2Expr = new ExprNodeColumnDesc(String.class, "col2", "table", false);
col3Expr = new ExprNodeColumnDesc(String.class, "col3", "table", false);
// column/column
children1.set(1, col2Expr);
children1.set(2, col3Expr);
exprDesc.setTypeInfo(TypeInfoFactory.stringTypeInfo);
ve = vc.getVectorExpression(exprDesc);
assertTrue(ve instanceof IfExprStringGroupColumnStringGroupColumn);
// column/scalar
children1.set(2, constDesc3);
ve = vc.getVectorExpression(exprDesc);
assertTrue(ve instanceof IfExprStringGroupColumnStringScalar);
// scalar/scalar
children1.set(1, constDesc2);
ve = vc.getVectorExpression(exprDesc);
assertTrue(ve instanceof IfExprStringScalarStringScalar);
// scalar/column
children1.set(2, col3Expr);
ve = vc.getVectorExpression(exprDesc);
assertTrue(ve instanceof IfExprStringScalarStringGroupColumn);
// test for CHAR type
CharTypeInfo charTypeInfo = new CharTypeInfo(10);
constDesc2 = new ExprNodeConstantDesc(charTypeInfo, new HiveChar("Alpha", 10));
constDesc3 = new ExprNodeConstantDesc(charTypeInfo, new HiveChar("Bravo", 10));
col2Expr = new ExprNodeColumnDesc(charTypeInfo, "col2", "table", false);
col3Expr = new ExprNodeColumnDesc(charTypeInfo, "col3", "table", false);
// column/column
children1.set(1, col2Expr);
children1.set(2, col3Expr);
ve = vc.getVectorExpression(exprDesc);
exprDesc.setTypeInfo(charTypeInfo);
assertTrue(ve instanceof IfExprCondExprCondExpr);
// column/scalar
children1.set(2, constDesc3);
ve = vc.getVectorExpression(exprDesc);
assertTrue(ve instanceof IfExprStringGroupColumnCharScalar);
// scalar/scalar
children1.set(1, constDesc2);
// ve = vc.getVectorExpression(exprDesc);
// assertTrue(ve instanceof IfExprCharScalarCharScalar);
// scalar/column
children1.set(2, col3Expr);
ve = vc.getVectorExpression(exprDesc);
assertTrue(ve instanceof IfExprCharScalarStringGroupColumn);
// test for VARCHAR type
VarcharTypeInfo varcharTypeInfo = new VarcharTypeInfo(10);
constDesc2 = new ExprNodeConstantDesc(varcharTypeInfo, new HiveVarchar("Alpha", 10));
constDesc3 = new ExprNodeConstantDesc(varcharTypeInfo, new HiveVarchar("Bravo", 10));
col2Expr = new ExprNodeColumnDesc(varcharTypeInfo, "col2", "table", false);
col3Expr = new ExprNodeColumnDesc(varcharTypeInfo, "col3", "table", false);
// column/column
children1.set(1, col2Expr);
children1.set(2, col3Expr);
exprDesc.setTypeInfo(varcharTypeInfo);
ve = vc.getVectorExpression(exprDesc);
assertTrue(ve instanceof IfExprStringGroupColumnStringGroupColumn);
// column/scalar
children1.set(2, constDesc3);
ve = vc.getVectorExpression(exprDesc);
assertTrue(ve instanceof IfExprStringGroupColumnVarCharScalar);
// scalar/scalar
children1.set(1, constDesc2);
// ve = vc.getVectorExpression(exprDesc);
// assertTrue(ve instanceof IfExprVarCharScalarVarCharScalar);
// scalar/column
children1.set(2, col3Expr);
ve = vc.getVectorExpression(exprDesc);
assertTrue(ve instanceof IfExprVarCharScalarStringGroupColumn);
}
use of org.apache.hadoop.hive.ql.udf.generic.GenericUDFIf in project hive by apache.
the class TestVectorIfStatement method doRowIfTest.
private void doRowIfTest(TypeInfo typeInfo, List<String> columns, List<ExprNodeDesc> children, Object[][] randomRows, ObjectInspector rowInspector, Object[] resultObjects) throws Exception {
GenericUDF udf = new GenericUDFIf();
ExprNodeGenericFuncDesc exprDesc = new ExprNodeGenericFuncDesc(typeInfo, udf, children);
HiveConf hiveConf = new HiveConf();
ExprNodeEvaluator evaluator = ExprNodeEvaluatorFactory.get(exprDesc, hiveConf);
evaluator.initialize(rowInspector);
final int rowCount = randomRows.length;
for (int i = 0; i < rowCount; i++) {
Object[] row = randomRows[i];
Object result = evaluator.evaluate(row);
resultObjects[i] = result;
}
}
Aggregations