use of org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc in project hive by apache.
the class TestVectorizationContext method testVectorizeAndOrProjectionExpression.
@Test
public void testVectorizeAndOrProjectionExpression() throws HiveException {
ExprNodeColumnDesc col1Expr = new ExprNodeColumnDesc(Integer.class, "col1", "table", false);
ExprNodeConstantDesc constDesc = new ExprNodeConstantDesc(Integer.valueOf(10));
GenericUDFOPGreaterThan udf = new GenericUDFOPGreaterThan();
ExprNodeGenericFuncDesc greaterExprDesc = new ExprNodeGenericFuncDesc();
greaterExprDesc.setTypeInfo(TypeInfoFactory.booleanTypeInfo);
greaterExprDesc.setGenericUDF(udf);
List<ExprNodeDesc> children1 = new ArrayList<ExprNodeDesc>(2);
children1.add(col1Expr);
children1.add(constDesc);
greaterExprDesc.setChildren(children1);
ExprNodeColumnDesc col2Expr = new ExprNodeColumnDesc(Boolean.class, "col2", "table", false);
GenericUDFOPAnd andUdf = new GenericUDFOPAnd();
ExprNodeGenericFuncDesc andExprDesc = new ExprNodeGenericFuncDesc();
andExprDesc.setTypeInfo(TypeInfoFactory.booleanTypeInfo);
andExprDesc.setGenericUDF(andUdf);
List<ExprNodeDesc> children3 = new ArrayList<ExprNodeDesc>(2);
children3.add(greaterExprDesc);
children3.add(col2Expr);
andExprDesc.setChildren(children3);
List<String> columns = new ArrayList<String>();
columns.add("col1");
columns.add("col2");
VectorizationContext vc = new VectorizationContext("name", columns);
VectorExpression veAnd = vc.getVectorExpression(andExprDesc, VectorExpressionDescriptor.Mode.FILTER);
assertEquals(veAnd.getClass(), FilterExprAndExpr.class);
assertEquals(veAnd.getChildExpressions()[0].getClass(), FilterLongColGreaterLongScalar.class);
assertEquals(veAnd.getChildExpressions()[1].getClass(), SelectColumnIsTrue.class);
veAnd = vc.getVectorExpression(andExprDesc, VectorExpressionDescriptor.Mode.PROJECTION);
assertEquals(veAnd.getClass(), ColAndCol.class);
assertEquals(1, veAnd.getChildExpressions().length);
assertEquals(veAnd.getChildExpressions()[0].getClass(), LongColGreaterLongScalar.class);
assertEquals(3, ((ColAndCol) veAnd).getOutputColumnNum());
// OR
GenericUDFOPOr orUdf = new GenericUDFOPOr();
ExprNodeGenericFuncDesc orExprDesc = new ExprNodeGenericFuncDesc();
orExprDesc.setTypeInfo(TypeInfoFactory.booleanTypeInfo);
orExprDesc.setGenericUDF(orUdf);
List<ExprNodeDesc> children4 = new ArrayList<ExprNodeDesc>(2);
children4.add(greaterExprDesc);
children4.add(col2Expr);
orExprDesc.setChildren(children4);
// Allocate new Vectorization context to reset the intermediate columns.
vc = new VectorizationContext("name", columns);
VectorExpression veOr = vc.getVectorExpression(orExprDesc, VectorExpressionDescriptor.Mode.FILTER);
assertEquals(veOr.getClass(), FilterExprOrExpr.class);
assertEquals(veOr.getChildExpressions()[0].getClass(), FilterLongColGreaterLongScalar.class);
assertEquals(veOr.getChildExpressions()[1].getClass(), SelectColumnIsTrue.class);
veOr = vc.getVectorExpression(orExprDesc, VectorExpressionDescriptor.Mode.PROJECTION);
assertEquals(veOr.getClass(), ColOrCol.class);
assertEquals(1, veAnd.getChildExpressions().length);
assertEquals(veAnd.getChildExpressions()[0].getClass(), LongColGreaterLongScalar.class);
assertEquals(3, ((ColOrCol) veOr).getOutputColumnNum());
}
use of org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc in project hive by apache.
the class TestVectorizationContext method testStringFilterExpressions.
@Test
public void testStringFilterExpressions() throws HiveException {
ExprNodeColumnDesc col1Expr = new ExprNodeColumnDesc(String.class, "col1", "table", false);
ExprNodeConstantDesc constDesc = new ExprNodeConstantDesc("Alpha");
GenericUDFOPGreaterThan udf = new GenericUDFOPGreaterThan();
ExprNodeGenericFuncDesc exprDesc = new ExprNodeGenericFuncDesc();
exprDesc.setGenericUDF(udf);
List<ExprNodeDesc> children1 = new ArrayList<ExprNodeDesc>(2);
children1.add(col1Expr);
children1.add(constDesc);
exprDesc.setChildren(children1);
List<String> columns = new ArrayList<String>();
columns.add("col0");
columns.add("col1");
columns.add("col2");
VectorizationContext vc = new VectorizationContext("name", columns);
VectorExpression ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER);
assertTrue(ve instanceof FilterStringGroupColGreaterStringScalar);
}
use of org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc in project hive by apache.
the class TestVectorizationContext method testFloatInExpressions.
@Test
public void testFloatInExpressions() throws HiveException {
ExprNodeColumnDesc col1Expr = new ExprNodeColumnDesc(Float.class, "col1", "table", false);
ExprNodeConstantDesc constDesc = new ExprNodeConstantDesc(Integer.valueOf(10));
GenericUDFOPPlus udf = new GenericUDFOPPlus();
List<ExprNodeDesc> children1 = new ArrayList<ExprNodeDesc>(2);
children1.add(col1Expr);
children1.add(constDesc);
ExprNodeGenericFuncDesc exprDesc = new ExprNodeGenericFuncDesc(TypeInfoFactory.doubleTypeInfo, udf, children1);
List<String> columns = new ArrayList<String>();
columns.add("col1");
VectorizationContext vc = new VectorizationContext("name", columns);
VectorExpression ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.PROJECTION);
assertTrue(ve.getOutputTypeInfo().equals(TypeInfoFactory.doubleTypeInfo));
}
use of org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc in project hive by apache.
the class TestVectorBetweenIn method doBetweenInVariation.
private boolean doBetweenInVariation(Random random, String typeName, boolean tryDecimal64, BetweenInVariation betweenInVariation, int subVariation) throws Exception {
TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeName);
boolean isDecimal64 = checkDecimal64(tryDecimal64, typeInfo);
DataTypePhysicalVariation dataTypePhysicalVariation = (isDecimal64 ? DataTypePhysicalVariation.DECIMAL_64 : DataTypePhysicalVariation.NONE);
final int decimal64Scale = (isDecimal64 ? ((DecimalTypeInfo) typeInfo).getScale() : 0);
// ----------------------------------------------------------------------------------------------
ObjectInspector objectInspector = TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(typeInfo);
final int valueCount = 10 + random.nextInt(10);
List<Object> valueList = new ArrayList<Object>(valueCount);
for (int i = 0; i < valueCount; i++) {
valueList.add(VectorRandomRowSource.randomWritable(random, typeInfo, objectInspector, dataTypePhysicalVariation, /* allowNull */
false));
}
final boolean isBetween = (betweenInVariation == BetweenInVariation.FILTER_BETWEEN || betweenInVariation == BetweenInVariation.FILTER_NOT_BETWEEN || betweenInVariation == BetweenInVariation.PROJECTION_BETWEEN || betweenInVariation == BetweenInVariation.PROJECTION_NOT_BETWEEN);
List<Object> compareList = new ArrayList<Object>();
List<Object> sortedList = new ArrayList<Object>(valueCount);
sortedList.addAll(valueList);
Object exampleObject = valueList.get(0);
WritableComparator writableComparator = WritableComparator.get((Class<? extends WritableComparable>) exampleObject.getClass());
sortedList.sort(writableComparator);
final boolean isInvert;
if (isBetween) {
// FILTER_BETWEEN
// FILTER_NOT_BETWEEN
// PROJECTION_BETWEEN
// PROJECTION_NOT_BETWEEN
isInvert = (betweenInVariation == BetweenInVariation.FILTER_NOT_BETWEEN || betweenInVariation == BetweenInVariation.PROJECTION_NOT_BETWEEN);
switch(subVariation) {
case 0:
// Range covers all values exactly.
compareList.add(sortedList.get(0));
compareList.add(sortedList.get(valueCount - 1));
break;
case 1:
// Exclude the first and last sorted.
compareList.add(sortedList.get(1));
compareList.add(sortedList.get(valueCount - 2));
break;
case 2:
// Only last 2 sorted.
compareList.add(sortedList.get(valueCount - 2));
compareList.add(sortedList.get(valueCount - 1));
break;
case 3:
case 4:
case 5:
case 6:
{
// Choose 2 adjacent in the middle.
Object min = sortedList.get(5);
Object max = sortedList.get(6);
compareList.add(min);
compareList.add(max);
if (subVariation == 4) {
removeValue(valueList, min);
} else if (subVariation == 5) {
removeValue(valueList, max);
} else if (subVariation == 6) {
removeValue(valueList, min);
removeValue(valueList, max);
}
}
break;
default:
return false;
}
} else {
// FILTER_IN.
// PROJECTION_IN.
isInvert = false;
switch(subVariation) {
case 0:
// All values.
compareList.addAll(valueList);
break;
case 1:
// Don't include the first and last sorted.
for (int i = 1; i < valueCount - 1; i++) {
compareList.add(valueList.get(i));
}
break;
case 2:
// The even ones.
for (int i = 2; i < valueCount; i += 2) {
compareList.add(valueList.get(i));
}
break;
case 3:
{
// Choose 2 adjacent in the middle.
Object min = sortedList.get(5);
Object max = sortedList.get(6);
compareList.add(min);
compareList.add(max);
if (subVariation == 4) {
removeValue(valueList, min);
} else if (subVariation == 5) {
removeValue(valueList, max);
} else if (subVariation == 6) {
removeValue(valueList, min);
removeValue(valueList, max);
}
}
break;
default:
return false;
}
}
// ----------------------------------------------------------------------------------------------
GenerationSpec generationSpec = GenerationSpec.createValueList(typeInfo, valueList);
List<GenerationSpec> generationSpecList = new ArrayList<GenerationSpec>();
List<DataTypePhysicalVariation> explicitDataTypePhysicalVariationList = new ArrayList<DataTypePhysicalVariation>();
generationSpecList.add(generationSpec);
explicitDataTypePhysicalVariationList.add(dataTypePhysicalVariation);
VectorRandomRowSource rowSource = new VectorRandomRowSource();
rowSource.initGenerationSpecSchema(random, generationSpecList, /* maxComplexDepth */
0, /* allowNull */
true, /* isUnicodeOk */
true, explicitDataTypePhysicalVariationList);
List<String> columns = new ArrayList<String>();
String col1Name = rowSource.columnNames().get(0);
columns.add(col1Name);
final ExprNodeDesc col1Expr = new ExprNodeColumnDesc(typeInfo, col1Name, "table", false);
List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>();
if (isBetween) {
children.add(new ExprNodeConstantDesc(Boolean.valueOf(isInvert)));
}
children.add(col1Expr);
for (Object compareObject : compareList) {
ExprNodeConstantDesc constDesc = new ExprNodeConstantDesc(typeInfo, VectorRandomRowSource.getNonWritableObject(compareObject, typeInfo, objectInspector));
children.add(constDesc);
}
String[] columnNames = columns.toArray(new String[0]);
Object[][] randomRows = rowSource.randomRows(100000);
VectorRandomBatchSource batchSource = VectorRandomBatchSource.createInterestingBatches(random, rowSource, randomRows, null);
final GenericUDF udf;
final ObjectInspector outputObjectInspector;
if (isBetween) {
udf = new GenericUDFBetween();
// First argument is boolean invert. Arguments 1..3 are inspectors for range limits...
ObjectInspector[] argumentOIs = new ObjectInspector[4];
argumentOIs[0] = PrimitiveObjectInspectorFactory.writableBooleanObjectInspector;
argumentOIs[1] = objectInspector;
argumentOIs[2] = objectInspector;
argumentOIs[3] = objectInspector;
outputObjectInspector = udf.initialize(argumentOIs);
} else {
final int compareCount = compareList.size();
udf = new GenericUDFIn();
ObjectInspector[] argumentOIs = new ObjectInspector[compareCount];
ConstantObjectInspector constantObjectInspector = (ConstantObjectInspector) children.get(1).getWritableObjectInspector();
for (int i = 0; i < compareCount; i++) {
argumentOIs[i] = constantObjectInspector;
}
outputObjectInspector = udf.initialize(argumentOIs);
}
TypeInfo outputTypeInfo = TypeInfoUtils.getTypeInfoFromObjectInspector(outputObjectInspector);
ExprNodeGenericFuncDesc exprDesc = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, udf, children);
return executeTestModesAndVerify(typeInfo, betweenInVariation, compareList, columns, columnNames, children, udf, exprDesc, randomRows, rowSource, batchSource, outputTypeInfo, /* skipAdaptor */
false);
}
use of org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc in project hive by apache.
the class TestVectorBetweenIn method doBetweenStructInVariation.
private boolean doBetweenStructInVariation(Random random, String structTypeName, BetweenInVariation betweenInVariation) throws Exception {
StructTypeInfo structTypeInfo = (StructTypeInfo) TypeInfoUtils.getTypeInfoFromTypeString(structTypeName);
ObjectInspector structObjectInspector = TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(structTypeInfo);
final int valueCount = 10 + random.nextInt(10);
List<Object> valueList = new ArrayList<Object>(valueCount);
for (int i = 0; i < valueCount; i++) {
valueList.add(VectorRandomRowSource.randomWritable(random, structTypeInfo, structObjectInspector, DataTypePhysicalVariation.NONE, /* allowNull */
false));
}
final boolean isInvert = false;
// No convenient WritableComparator / WritableComparable available for STRUCT.
List<Object> compareList = new ArrayList<Object>();
Set<Integer> includedSet = new HashSet<Integer>();
final int chooseLimit = 4 + random.nextInt(valueCount / 2);
int chooseCount = 0;
while (chooseCount < chooseLimit) {
final int index = random.nextInt(valueCount);
if (includedSet.contains(index)) {
continue;
}
includedSet.add(index);
compareList.add(valueList.get(index));
chooseCount++;
}
// ----------------------------------------------------------------------------------------------
GenerationSpec structGenerationSpec = GenerationSpec.createValueList(structTypeInfo, valueList);
List<GenerationSpec> structGenerationSpecList = new ArrayList<GenerationSpec>();
List<DataTypePhysicalVariation> structExplicitDataTypePhysicalVariationList = new ArrayList<DataTypePhysicalVariation>();
structGenerationSpecList.add(structGenerationSpec);
structExplicitDataTypePhysicalVariationList.add(DataTypePhysicalVariation.NONE);
VectorRandomRowSource structRowSource = new VectorRandomRowSource();
structRowSource.initGenerationSpecSchema(random, structGenerationSpecList, /* maxComplexDepth */
0, /* allowNull */
true, /* isUnicodeOk */
true, structExplicitDataTypePhysicalVariationList);
Object[][] structRandomRows = structRowSource.randomRows(100000);
// ---------------------------------------------------------------------------------------------
List<GenerationSpec> generationSpecList = new ArrayList<GenerationSpec>();
List<DataTypePhysicalVariation> explicitDataTypePhysicalVariationList = new ArrayList<DataTypePhysicalVariation>();
List<TypeInfo> fieldTypeInfoList = structTypeInfo.getAllStructFieldTypeInfos();
final int fieldCount = fieldTypeInfoList.size();
for (int i = 0; i < fieldCount; i++) {
GenerationSpec generationSpec = GenerationSpec.createOmitGeneration(fieldTypeInfoList.get(i));
generationSpecList.add(generationSpec);
explicitDataTypePhysicalVariationList.add(DataTypePhysicalVariation.NONE);
}
VectorRandomRowSource rowSource = new VectorRandomRowSource();
rowSource.initGenerationSpecSchema(random, generationSpecList, /* maxComplexDepth */
0, /* allowNull */
true, /* isUnicodeOk */
true, explicitDataTypePhysicalVariationList);
Object[][] randomRows = rowSource.randomRows(100000);
final int rowCount = randomRows.length;
for (int r = 0; r < rowCount; r++) {
List<Object> fieldValueList = (ArrayList) structRandomRows[r][0];
for (int f = 0; f < fieldCount; f++) {
randomRows[r][f] = fieldValueList.get(f);
}
}
// ---------------------------------------------------------------------------------------------
// Currently, STRUCT IN vectorization assumes a GenericUDFStruct.
List<ObjectInspector> structUdfObjectInspectorList = new ArrayList<ObjectInspector>();
List<ExprNodeDesc> structUdfChildren = new ArrayList<ExprNodeDesc>(fieldCount);
List<String> rowColumnNameList = rowSource.columnNames();
for (int i = 0; i < fieldCount; i++) {
TypeInfo fieldTypeInfo = fieldTypeInfoList.get(i);
ExprNodeColumnDesc fieldExpr = new ExprNodeColumnDesc(fieldTypeInfo, rowColumnNameList.get(i), "table", false);
structUdfChildren.add(fieldExpr);
ObjectInspector fieldObjectInspector = VectorRandomRowSource.getObjectInspector(fieldTypeInfo, DataTypePhysicalVariation.NONE);
structUdfObjectInspectorList.add(fieldObjectInspector);
}
StandardStructObjectInspector structUdfObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(rowColumnNameList, structUdfObjectInspectorList);
String structUdfTypeName = structUdfObjectInspector.getTypeName();
TypeInfo structUdfTypeInfo = TypeInfoUtils.getTypeInfoFromTypeString(structUdfTypeName);
String structFuncText = "struct";
FunctionInfo fi = FunctionRegistry.getFunctionInfo(structFuncText);
GenericUDF genericUDF = fi.getGenericUDF();
ExprNodeDesc col1Expr = new ExprNodeGenericFuncDesc(structUdfObjectInspector, genericUDF, structFuncText, structUdfChildren);
// ---------------------------------------------------------------------------------------------
List<String> columns = new ArrayList<String>();
List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>();
children.add(col1Expr);
for (int i = 0; i < compareList.size(); i++) {
Object compareObject = compareList.get(i);
ExprNodeConstantDesc constDesc = new ExprNodeConstantDesc(structUdfTypeInfo, VectorRandomRowSource.getNonWritableObject(compareObject, structUdfTypeInfo, structUdfObjectInspector));
children.add(constDesc);
}
for (int i = 0; i < fieldCount; i++) {
columns.add(rowColumnNameList.get(i));
}
String[] columnNames = columns.toArray(new String[0]);
VectorRandomBatchSource batchSource = VectorRandomBatchSource.createInterestingBatches(random, rowSource, randomRows, null);
// ---------------------------------------------------------------------------------------------
final GenericUDF udf = new GenericUDFIn();
final int compareCount = compareList.size();
ObjectInspector[] argumentOIs = new ObjectInspector[compareCount];
for (int i = 0; i < compareCount; i++) {
argumentOIs[i] = structUdfObjectInspector;
}
final ObjectInspector outputObjectInspector = udf.initialize(argumentOIs);
TypeInfo outputTypeInfo = TypeInfoUtils.getTypeInfoFromObjectInspector(outputObjectInspector);
ExprNodeGenericFuncDesc exprDesc = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, udf, children);
return executeTestModesAndVerify(structUdfTypeInfo, betweenInVariation, compareList, columns, columnNames, children, udf, exprDesc, randomRows, rowSource, batchSource, outputTypeInfo, /* skipAdaptor */
true);
}
Aggregations