use of org.apache.hadoop.hive.ql.exec.vector.VectorRandomBatchSource in project hive by apache.
the class AbstractMapJoin method setupMapJoin.
protected void setupMapJoin(HiveConf hiveConf, long seed, int rowCount, VectorMapJoinVariation vectorMapJoinVariation, MapJoinTestImplementation mapJoinImplementation, String[] bigTableColumnNames, TypeInfo[] bigTableTypeInfos, int[] bigTableKeyColumnNums, String[] smallTableValueColumnNames, TypeInfo[] smallTableValueTypeInfos, int[] bigTableRetainColumnNums, int[] smallTableRetainKeyColumnNums, int[] smallTableRetainValueColumnNums, SmallTableGenerationParameters smallTableGenerationParameters) throws Exception {
this.vectorMapJoinVariation = vectorMapJoinVariation;
this.mapJoinImplementation = mapJoinImplementation;
testDesc = new MapJoinTestDescription(hiveConf, vectorMapJoinVariation, bigTableTypeInfos, bigTableKeyColumnNums, smallTableValueTypeInfos, smallTableRetainKeyColumnNums, smallTableGenerationParameters, MapJoinPlanVariation.DYNAMIC_PARTITION_HASH_JOIN);
// Prepare data. Good for ANY implementation variation.
testData = new MapJoinTestData(rowCount, testDesc, seed);
ObjectRegistryImpl objectRegistry = new ObjectRegistryImpl();
ObjectCache.setupObjectRegistry(objectRegistry);
operator = setupBenchmarkImplementation(mapJoinImplementation, testDesc, testData);
isVectorOutput = isVectorOutput(mapJoinImplementation);
/*
* We don't measure data generation execution cost -- generate the big table into memory first.
*/
if (!isVectorOutput) {
bigTableRows = testData.getBigTableBatchSource().getRandomRows();
} else {
ArrayList<VectorizedRowBatch> bigTableBatchList = new ArrayList<VectorizedRowBatch>();
VectorRandomBatchSource batchSource = testData.getBigTableBatchSource();
batchSource.resetBatchIteration();
while (true) {
VectorizedRowBatch batch = testData.createBigTableBatch(testDesc);
if (!batchSource.fillNextBatch(batch)) {
break;
}
bigTableBatchList.add(batch);
}
bigTableBatches = bigTableBatchList.toArray(new VectorizedRowBatch[0]);
}
}
use of org.apache.hadoop.hive.ql.exec.vector.VectorRandomBatchSource in project hive by apache.
the class TestVectorBetweenIn method doBetweenInVariation.
private boolean doBetweenInVariation(Random random, String typeName, boolean tryDecimal64, BetweenInVariation betweenInVariation, int subVariation) throws Exception {
TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeName);
boolean isDecimal64 = checkDecimal64(tryDecimal64, typeInfo);
DataTypePhysicalVariation dataTypePhysicalVariation = (isDecimal64 ? DataTypePhysicalVariation.DECIMAL_64 : DataTypePhysicalVariation.NONE);
final int decimal64Scale = (isDecimal64 ? ((DecimalTypeInfo) typeInfo).getScale() : 0);
// ----------------------------------------------------------------------------------------------
ObjectInspector objectInspector = TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(typeInfo);
final int valueCount = 10 + random.nextInt(10);
List<Object> valueList = new ArrayList<Object>(valueCount);
for (int i = 0; i < valueCount; i++) {
valueList.add(VectorRandomRowSource.randomWritable(random, typeInfo, objectInspector, dataTypePhysicalVariation, /* allowNull */
false));
}
final boolean isBetween = (betweenInVariation == BetweenInVariation.FILTER_BETWEEN || betweenInVariation == BetweenInVariation.FILTER_NOT_BETWEEN || betweenInVariation == BetweenInVariation.PROJECTION_BETWEEN || betweenInVariation == BetweenInVariation.PROJECTION_NOT_BETWEEN);
List<Object> compareList = new ArrayList<Object>();
List<Object> sortedList = new ArrayList<Object>(valueCount);
sortedList.addAll(valueList);
Object exampleObject = valueList.get(0);
WritableComparator writableComparator = WritableComparator.get((Class<? extends WritableComparable>) exampleObject.getClass());
sortedList.sort(writableComparator);
final boolean isInvert;
if (isBetween) {
// FILTER_BETWEEN
// FILTER_NOT_BETWEEN
// PROJECTION_BETWEEN
// PROJECTION_NOT_BETWEEN
isInvert = (betweenInVariation == BetweenInVariation.FILTER_NOT_BETWEEN || betweenInVariation == BetweenInVariation.PROJECTION_NOT_BETWEEN);
switch(subVariation) {
case 0:
// Range covers all values exactly.
compareList.add(sortedList.get(0));
compareList.add(sortedList.get(valueCount - 1));
break;
case 1:
// Exclude the first and last sorted.
compareList.add(sortedList.get(1));
compareList.add(sortedList.get(valueCount - 2));
break;
case 2:
// Only last 2 sorted.
compareList.add(sortedList.get(valueCount - 2));
compareList.add(sortedList.get(valueCount - 1));
break;
case 3:
case 4:
case 5:
case 6:
{
// Choose 2 adjacent in the middle.
Object min = sortedList.get(5);
Object max = sortedList.get(6);
compareList.add(min);
compareList.add(max);
if (subVariation == 4) {
removeValue(valueList, min);
} else if (subVariation == 5) {
removeValue(valueList, max);
} else if (subVariation == 6) {
removeValue(valueList, min);
removeValue(valueList, max);
}
}
break;
default:
return false;
}
} else {
// FILTER_IN.
// PROJECTION_IN.
isInvert = false;
switch(subVariation) {
case 0:
// All values.
compareList.addAll(valueList);
break;
case 1:
// Don't include the first and last sorted.
for (int i = 1; i < valueCount - 1; i++) {
compareList.add(valueList.get(i));
}
break;
case 2:
// The even ones.
for (int i = 2; i < valueCount; i += 2) {
compareList.add(valueList.get(i));
}
break;
case 3:
{
// Choose 2 adjacent in the middle.
Object min = sortedList.get(5);
Object max = sortedList.get(6);
compareList.add(min);
compareList.add(max);
if (subVariation == 4) {
removeValue(valueList, min);
} else if (subVariation == 5) {
removeValue(valueList, max);
} else if (subVariation == 6) {
removeValue(valueList, min);
removeValue(valueList, max);
}
}
break;
default:
return false;
}
}
// ----------------------------------------------------------------------------------------------
GenerationSpec generationSpec = GenerationSpec.createValueList(typeInfo, valueList);
List<GenerationSpec> generationSpecList = new ArrayList<GenerationSpec>();
List<DataTypePhysicalVariation> explicitDataTypePhysicalVariationList = new ArrayList<DataTypePhysicalVariation>();
generationSpecList.add(generationSpec);
explicitDataTypePhysicalVariationList.add(dataTypePhysicalVariation);
VectorRandomRowSource rowSource = new VectorRandomRowSource();
rowSource.initGenerationSpecSchema(random, generationSpecList, /* maxComplexDepth */
0, /* allowNull */
true, /* isUnicodeOk */
true, explicitDataTypePhysicalVariationList);
List<String> columns = new ArrayList<String>();
String col1Name = rowSource.columnNames().get(0);
columns.add(col1Name);
final ExprNodeDesc col1Expr = new ExprNodeColumnDesc(typeInfo, col1Name, "table", false);
List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>();
if (isBetween) {
children.add(new ExprNodeConstantDesc(Boolean.valueOf(isInvert)));
}
children.add(col1Expr);
for (Object compareObject : compareList) {
ExprNodeConstantDesc constDesc = new ExprNodeConstantDesc(typeInfo, VectorRandomRowSource.getNonWritableObject(compareObject, typeInfo, objectInspector));
children.add(constDesc);
}
String[] columnNames = columns.toArray(new String[0]);
Object[][] randomRows = rowSource.randomRows(100000);
VectorRandomBatchSource batchSource = VectorRandomBatchSource.createInterestingBatches(random, rowSource, randomRows, null);
final GenericUDF udf;
final ObjectInspector outputObjectInspector;
if (isBetween) {
udf = new GenericUDFBetween();
// First argument is boolean invert. Arguments 1..3 are inspectors for range limits...
ObjectInspector[] argumentOIs = new ObjectInspector[4];
argumentOIs[0] = PrimitiveObjectInspectorFactory.writableBooleanObjectInspector;
argumentOIs[1] = objectInspector;
argumentOIs[2] = objectInspector;
argumentOIs[3] = objectInspector;
outputObjectInspector = udf.initialize(argumentOIs);
} else {
final int compareCount = compareList.size();
udf = new GenericUDFIn();
ObjectInspector[] argumentOIs = new ObjectInspector[compareCount];
ConstantObjectInspector constantObjectInspector = (ConstantObjectInspector) children.get(1).getWritableObjectInspector();
for (int i = 0; i < compareCount; i++) {
argumentOIs[i] = constantObjectInspector;
}
outputObjectInspector = udf.initialize(argumentOIs);
}
TypeInfo outputTypeInfo = TypeInfoUtils.getTypeInfoFromObjectInspector(outputObjectInspector);
ExprNodeGenericFuncDesc exprDesc = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, udf, children);
return executeTestModesAndVerify(typeInfo, betweenInVariation, compareList, columns, columnNames, children, udf, exprDesc, randomRows, rowSource, batchSource, outputTypeInfo, /* skipAdaptor */
false);
}
use of org.apache.hadoop.hive.ql.exec.vector.VectorRandomBatchSource in project hive by apache.
the class TestVectorBetweenIn method doBetweenStructInVariation.
private boolean doBetweenStructInVariation(Random random, String structTypeName, BetweenInVariation betweenInVariation) throws Exception {
StructTypeInfo structTypeInfo = (StructTypeInfo) TypeInfoUtils.getTypeInfoFromTypeString(structTypeName);
ObjectInspector structObjectInspector = TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(structTypeInfo);
final int valueCount = 10 + random.nextInt(10);
List<Object> valueList = new ArrayList<Object>(valueCount);
for (int i = 0; i < valueCount; i++) {
valueList.add(VectorRandomRowSource.randomWritable(random, structTypeInfo, structObjectInspector, DataTypePhysicalVariation.NONE, /* allowNull */
false));
}
final boolean isInvert = false;
// No convenient WritableComparator / WritableComparable available for STRUCT.
List<Object> compareList = new ArrayList<Object>();
Set<Integer> includedSet = new HashSet<Integer>();
final int chooseLimit = 4 + random.nextInt(valueCount / 2);
int chooseCount = 0;
while (chooseCount < chooseLimit) {
final int index = random.nextInt(valueCount);
if (includedSet.contains(index)) {
continue;
}
includedSet.add(index);
compareList.add(valueList.get(index));
chooseCount++;
}
// ----------------------------------------------------------------------------------------------
GenerationSpec structGenerationSpec = GenerationSpec.createValueList(structTypeInfo, valueList);
List<GenerationSpec> structGenerationSpecList = new ArrayList<GenerationSpec>();
List<DataTypePhysicalVariation> structExplicitDataTypePhysicalVariationList = new ArrayList<DataTypePhysicalVariation>();
structGenerationSpecList.add(structGenerationSpec);
structExplicitDataTypePhysicalVariationList.add(DataTypePhysicalVariation.NONE);
VectorRandomRowSource structRowSource = new VectorRandomRowSource();
structRowSource.initGenerationSpecSchema(random, structGenerationSpecList, /* maxComplexDepth */
0, /* allowNull */
true, /* isUnicodeOk */
true, structExplicitDataTypePhysicalVariationList);
Object[][] structRandomRows = structRowSource.randomRows(100000);
// ---------------------------------------------------------------------------------------------
List<GenerationSpec> generationSpecList = new ArrayList<GenerationSpec>();
List<DataTypePhysicalVariation> explicitDataTypePhysicalVariationList = new ArrayList<DataTypePhysicalVariation>();
List<TypeInfo> fieldTypeInfoList = structTypeInfo.getAllStructFieldTypeInfos();
final int fieldCount = fieldTypeInfoList.size();
for (int i = 0; i < fieldCount; i++) {
GenerationSpec generationSpec = GenerationSpec.createOmitGeneration(fieldTypeInfoList.get(i));
generationSpecList.add(generationSpec);
explicitDataTypePhysicalVariationList.add(DataTypePhysicalVariation.NONE);
}
VectorRandomRowSource rowSource = new VectorRandomRowSource();
rowSource.initGenerationSpecSchema(random, generationSpecList, /* maxComplexDepth */
0, /* allowNull */
true, /* isUnicodeOk */
true, explicitDataTypePhysicalVariationList);
Object[][] randomRows = rowSource.randomRows(100000);
final int rowCount = randomRows.length;
for (int r = 0; r < rowCount; r++) {
List<Object> fieldValueList = (ArrayList) structRandomRows[r][0];
for (int f = 0; f < fieldCount; f++) {
randomRows[r][f] = fieldValueList.get(f);
}
}
// ---------------------------------------------------------------------------------------------
// Currently, STRUCT IN vectorization assumes a GenericUDFStruct.
List<ObjectInspector> structUdfObjectInspectorList = new ArrayList<ObjectInspector>();
List<ExprNodeDesc> structUdfChildren = new ArrayList<ExprNodeDesc>(fieldCount);
List<String> rowColumnNameList = rowSource.columnNames();
for (int i = 0; i < fieldCount; i++) {
TypeInfo fieldTypeInfo = fieldTypeInfoList.get(i);
ExprNodeColumnDesc fieldExpr = new ExprNodeColumnDesc(fieldTypeInfo, rowColumnNameList.get(i), "table", false);
structUdfChildren.add(fieldExpr);
ObjectInspector fieldObjectInspector = VectorRandomRowSource.getObjectInspector(fieldTypeInfo, DataTypePhysicalVariation.NONE);
structUdfObjectInspectorList.add(fieldObjectInspector);
}
StandardStructObjectInspector structUdfObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(rowColumnNameList, structUdfObjectInspectorList);
String structUdfTypeName = structUdfObjectInspector.getTypeName();
TypeInfo structUdfTypeInfo = TypeInfoUtils.getTypeInfoFromTypeString(structUdfTypeName);
String structFuncText = "struct";
FunctionInfo fi = FunctionRegistry.getFunctionInfo(structFuncText);
GenericUDF genericUDF = fi.getGenericUDF();
ExprNodeDesc col1Expr = new ExprNodeGenericFuncDesc(structUdfObjectInspector, genericUDF, structFuncText, structUdfChildren);
// ---------------------------------------------------------------------------------------------
List<String> columns = new ArrayList<String>();
List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>();
children.add(col1Expr);
for (int i = 0; i < compareList.size(); i++) {
Object compareObject = compareList.get(i);
ExprNodeConstantDesc constDesc = new ExprNodeConstantDesc(structUdfTypeInfo, VectorRandomRowSource.getNonWritableObject(compareObject, structUdfTypeInfo, structUdfObjectInspector));
children.add(constDesc);
}
for (int i = 0; i < fieldCount; i++) {
columns.add(rowColumnNameList.get(i));
}
String[] columnNames = columns.toArray(new String[0]);
VectorRandomBatchSource batchSource = VectorRandomBatchSource.createInterestingBatches(random, rowSource, randomRows, null);
// ---------------------------------------------------------------------------------------------
final GenericUDF udf = new GenericUDFIn();
final int compareCount = compareList.size();
ObjectInspector[] argumentOIs = new ObjectInspector[compareCount];
for (int i = 0; i < compareCount; i++) {
argumentOIs[i] = structUdfObjectInspector;
}
final ObjectInspector outputObjectInspector = udf.initialize(argumentOIs);
TypeInfo outputTypeInfo = TypeInfoUtils.getTypeInfoFromObjectInspector(outputObjectInspector);
ExprNodeGenericFuncDesc exprDesc = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, udf, children);
return executeTestModesAndVerify(structUdfTypeInfo, betweenInVariation, compareList, columns, columnNames, children, udf, exprDesc, randomRows, rowSource, batchSource, outputTypeInfo, /* skipAdaptor */
true);
}
use of org.apache.hadoop.hive.ql.exec.vector.VectorRandomBatchSource in project hive by apache.
the class TestVectorCastStatement method doIfTestOneCast.
private void doIfTestOneCast(Random random, String typeName, DataTypePhysicalVariation dataTypePhysicalVariation, PrimitiveCategory targetPrimitiveCategory) throws Exception {
TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeName);
PrimitiveCategory primitiveCategory = ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory();
boolean isDecimal64 = (dataTypePhysicalVariation == DataTypePhysicalVariation.DECIMAL_64);
final int decimal64Scale = (isDecimal64 ? ((DecimalTypeInfo) typeInfo).getScale() : 0);
// ----------------------------------------------------------------------------------------------
String targetTypeName;
if (targetPrimitiveCategory == PrimitiveCategory.BYTE) {
targetTypeName = "tinyint";
} else if (targetPrimitiveCategory == PrimitiveCategory.SHORT) {
targetTypeName = "smallint";
} else if (targetPrimitiveCategory == PrimitiveCategory.LONG) {
targetTypeName = "bigint";
} else {
targetTypeName = targetPrimitiveCategory.name().toLowerCase();
}
targetTypeName = VectorRandomRowSource.getDecoratedTypeName(random, targetTypeName);
TypeInfo targetTypeInfo = TypeInfoUtils.getTypeInfoFromTypeString(targetTypeName);
// ----------------------------------------------------------------------------------------------
GenerationSpec generationSpec;
if (needsValidDataTypeData(targetTypeInfo) && (primitiveCategory == PrimitiveCategory.STRING || primitiveCategory == PrimitiveCategory.CHAR || primitiveCategory == PrimitiveCategory.VARCHAR)) {
generationSpec = GenerationSpec.createStringFamilyOtherTypeValue(typeInfo, targetTypeInfo);
} else {
generationSpec = GenerationSpec.createSameType(typeInfo);
}
List<GenerationSpec> generationSpecList = new ArrayList<GenerationSpec>();
List<DataTypePhysicalVariation> explicitDataTypePhysicalVariationList = new ArrayList<DataTypePhysicalVariation>();
generationSpecList.add(generationSpec);
explicitDataTypePhysicalVariationList.add(dataTypePhysicalVariation);
VectorRandomRowSource rowSource = new VectorRandomRowSource();
rowSource.initGenerationSpecSchema(random, generationSpecList, /* maxComplexDepth */
0, /* allowNull */
true, /* isUnicodeOk */
true, explicitDataTypePhysicalVariationList);
List<String> columns = new ArrayList<String>();
columns.add("col1");
ExprNodeColumnDesc col1Expr = new ExprNodeColumnDesc(typeInfo, "col1", "table", false);
List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>();
children.add(col1Expr);
String[] columnNames = columns.toArray(new String[0]);
Object[][] randomRows = rowSource.randomRows(100000);
VectorRandomBatchSource batchSource = VectorRandomBatchSource.createInterestingBatches(random, rowSource, randomRows, null);
final int rowCount = randomRows.length;
Object[][] resultObjectsArray = new Object[CastStmtTestMode.count][];
for (int i = 0; i < CastStmtTestMode.count; i++) {
Object[] resultObjects = new Object[rowCount];
resultObjectsArray[i] = resultObjects;
CastStmtTestMode ifStmtTestMode = CastStmtTestMode.values()[i];
switch(ifStmtTestMode) {
case ROW_MODE:
if (!doRowCastTest(typeInfo, targetTypeInfo, columns, children, randomRows, rowSource.rowStructObjectInspector(), resultObjects)) {
return;
}
break;
case ADAPTOR:
case VECTOR_EXPRESSION:
if (!doVectorCastTest(typeInfo, targetTypeInfo, columns, columnNames, rowSource.typeInfos(), rowSource.dataTypePhysicalVariations(), children, ifStmtTestMode, batchSource, resultObjects)) {
return;
}
break;
default:
throw new RuntimeException("Unexpected IF statement test mode " + ifStmtTestMode);
}
}
for (int i = 0; i < rowCount; i++) {
// Row-mode is the expected value.
Object expectedResult = resultObjectsArray[0][i];
for (int v = 1; v < CastStmtTestMode.count; v++) {
Object vectorResult = resultObjectsArray[v][i];
if (expectedResult == null || vectorResult == null) {
if (expectedResult != null || vectorResult != null) {
Assert.fail("Row " + i + " sourceTypeName " + typeName + " targetTypeName " + targetTypeName + " " + CastStmtTestMode.values()[v] + " result is NULL " + (vectorResult == null ? "YES" : "NO result " + vectorResult.toString()) + " does not match row-mode expected result is NULL " + (expectedResult == null ? "YES" : "NO result " + expectedResult.toString()) + " row values " + Arrays.toString(randomRows[i]));
}
} else {
if (isDecimal64 && expectedResult instanceof LongWritable) {
HiveDecimalWritable expectedHiveDecimalWritable = new HiveDecimalWritable(0);
expectedHiveDecimalWritable.deserialize64(((LongWritable) expectedResult).get(), decimal64Scale);
expectedResult = expectedHiveDecimalWritable;
}
if (!expectedResult.equals(vectorResult)) {
Assert.fail("Row " + i + " sourceTypeName " + typeName + " targetTypeName " + targetTypeName + " " + CastStmtTestMode.values()[v] + " result " + vectorResult.toString() + " (" + vectorResult.getClass().getSimpleName() + ")" + " does not match row-mode expected result " + expectedResult.toString() + " (" + expectedResult.getClass().getSimpleName() + ")" + " row values " + Arrays.toString(randomRows[i]));
}
}
}
}
}
use of org.apache.hadoop.hive.ql.exec.vector.VectorRandomBatchSource in project hive by apache.
the class TestVectorAggregation method doTests.
private void doTests(Random random, String aggregationName, TypeInfo typeInfo, boolean isCountStar, boolean tryDecimal64) throws Exception {
List<GenerationSpec> dataAggrGenerationSpecList = new ArrayList<GenerationSpec>();
List<DataTypePhysicalVariation> explicitDataTypePhysicalVariationList = new ArrayList<DataTypePhysicalVariation>();
TypeInfo keyTypeInfo = TypeInfoFactory.shortTypeInfo;
GenerationSpec keyGenerationSpec = GenerationSpec.createOmitGeneration(keyTypeInfo);
dataAggrGenerationSpecList.add(keyGenerationSpec);
explicitDataTypePhysicalVariationList.add(DataTypePhysicalVariation.NONE);
final boolean decimal64Enable = checkDecimal64(tryDecimal64, typeInfo);
GenerationSpec generationSpec = GenerationSpec.createSameType(typeInfo);
dataAggrGenerationSpecList.add(generationSpec);
explicitDataTypePhysicalVariationList.add(decimal64Enable ? DataTypePhysicalVariation.DECIMAL_64 : DataTypePhysicalVariation.NONE);
List<String> columns = new ArrayList<String>();
columns.add("col0");
columns.add("col1");
ExprNodeColumnDesc dataAggrCol1Expr = new ExprNodeColumnDesc(typeInfo, "col1", "table", false);
List<ExprNodeDesc> dataAggrParameters = new ArrayList<ExprNodeDesc>();
if (!isCountStar) {
dataAggrParameters.add(dataAggrCol1Expr);
}
final int dataAggrParameterCount = dataAggrParameters.size();
ObjectInspector[] dataAggrParameterObjectInspectors = new ObjectInspector[dataAggrParameterCount];
for (int i = 0; i < dataAggrParameterCount; i++) {
TypeInfo paramTypeInfo = dataAggrParameters.get(i).getTypeInfo();
dataAggrParameterObjectInspectors[i] = TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(paramTypeInfo);
}
String[] columnNames = columns.toArray(new String[0]);
final int dataAggrMaxKeyCount = 20000;
final int reductionFactor = 16;
ObjectInspector keyObjectInspector = VectorRandomRowSource.getObjectInspector(keyTypeInfo);
/*
* PARTIAL1.
*/
VectorRandomRowSource partial1RowSource = new VectorRandomRowSource();
boolean allowNull = !aggregationName.equals("bloom_filter");
partial1RowSource.initGenerationSpecSchema(random, dataAggrGenerationSpecList, /* maxComplexDepth */
0, allowNull, /* isUnicodeOk */
true, explicitDataTypePhysicalVariationList);
Object[][] partial1RandomRows = partial1RowSource.randomRows(TEST_ROW_COUNT);
final int partial1RowCount = partial1RandomRows.length;
for (int i = 0; i < partial1RowCount; i++) {
final short shortKey = (short) getLinearRandomNumber(random, dataAggrMaxKeyCount);
partial1RandomRows[i][0] = ((WritableShortObjectInspector) keyObjectInspector).create((short) shortKey);
}
VectorRandomBatchSource partial1BatchSource = VectorRandomBatchSource.createInterestingBatches(random, partial1RowSource, partial1RandomRows, null);
GenericUDAFEvaluator partial1Evaluator = getEvaluator(aggregationName, typeInfo);
if (isCountStar) {
Assert.assertTrue(partial1Evaluator instanceof GenericUDAFCountEvaluator);
GenericUDAFCountEvaluator countEvaluator = (GenericUDAFCountEvaluator) partial1Evaluator;
countEvaluator.setCountAllColumns(true);
}
/*
System.out.println(
"*DEBUG* GenericUDAFEvaluator for " + aggregationName + ", " + typeInfo.getTypeName() + ": " +
partial1Evaluator.getClass().getSimpleName());
*/
// The only way to get the return object inspector (and its return type) is to
// initialize it...
final GenericUDAFEvaluator.Mode partial1UdafEvaluatorMode = GenericUDAFEvaluator.Mode.PARTIAL1;
ObjectInspector partial1ReturnOI = partial1Evaluator.init(partial1UdafEvaluatorMode, dataAggrParameterObjectInspectors);
TypeInfo partial1OutputTypeInfo = TypeInfoUtils.getTypeInfoFromObjectInspector(partial1ReturnOI);
Object[] partial1ResultsArray = new Object[AggregationTestMode.count];
executeAggregationTests(aggregationName, typeInfo, partial1Evaluator, partial1OutputTypeInfo, partial1UdafEvaluatorMode, dataAggrMaxKeyCount, columns, columnNames, dataAggrParameters, partial1RandomRows, partial1RowSource, partial1BatchSource, tryDecimal64, partial1ResultsArray);
verifyAggregationResults(typeInfo, partial1OutputTypeInfo, dataAggrMaxKeyCount, partial1UdafEvaluatorMode, partial1ResultsArray);
final boolean hasDifferentCompleteExpr;
if (varianceNames.contains(aggregationName)) {
hasDifferentCompleteExpr = true;
} else {
switch(aggregationName) {
case "avg":
hasDifferentCompleteExpr = true;
break;
case "bloom_filter":
case "count":
case "max":
case "min":
case "sum":
hasDifferentCompleteExpr = false;
break;
default:
throw new RuntimeException("Unexpected aggregation name " + aggregationName);
}
}
if (hasDifferentCompleteExpr) {
/*
* COMPLETE.
*/
VectorRandomRowSource completeRowSource = new VectorRandomRowSource();
completeRowSource.initGenerationSpecSchema(random, dataAggrGenerationSpecList, /* maxComplexDepth */
0, /* allowNull */
true, /* isUnicodeOk */
true, explicitDataTypePhysicalVariationList);
Object[][] completeRandomRows = completeRowSource.randomRows(TEST_ROW_COUNT);
final int completeRowCount = completeRandomRows.length;
for (int i = 0; i < completeRowCount; i++) {
final short shortKey = (short) getLinearRandomNumber(random, dataAggrMaxKeyCount);
completeRandomRows[i][0] = ((WritableShortObjectInspector) keyObjectInspector).create((short) shortKey);
}
VectorRandomBatchSource completeBatchSource = VectorRandomBatchSource.createInterestingBatches(random, completeRowSource, completeRandomRows, null);
GenericUDAFEvaluator completeEvaluator = getEvaluator(aggregationName, typeInfo);
/*
System.out.println(
"*DEBUG* GenericUDAFEvaluator for " + aggregationName + ", " + typeInfo.getTypeName() + ": " +
completeEvaluator.getClass().getSimpleName());
*/
// The only way to get the return object inspector (and its return type) is to
// initialize it...
final GenericUDAFEvaluator.Mode completeUdafEvaluatorMode = GenericUDAFEvaluator.Mode.COMPLETE;
ObjectInspector completeReturnOI = completeEvaluator.init(completeUdafEvaluatorMode, dataAggrParameterObjectInspectors);
TypeInfo completeOutputTypeInfo = TypeInfoUtils.getTypeInfoFromObjectInspector(completeReturnOI);
Object[] completeResultsArray = new Object[AggregationTestMode.count];
executeAggregationTests(aggregationName, typeInfo, completeEvaluator, completeOutputTypeInfo, completeUdafEvaluatorMode, dataAggrMaxKeyCount, columns, columnNames, dataAggrParameters, completeRandomRows, completeRowSource, completeBatchSource, tryDecimal64, completeResultsArray);
verifyAggregationResults(typeInfo, completeOutputTypeInfo, dataAggrMaxKeyCount, completeUdafEvaluatorMode, completeResultsArray);
}
final boolean hasDifferentPartial2Expr;
if (varianceNames.contains(aggregationName)) {
hasDifferentPartial2Expr = true;
} else {
switch(aggregationName) {
case "avg":
hasDifferentPartial2Expr = true;
break;
case "bloom_filter":
case "count":
case "max":
case "min":
case "sum":
hasDifferentPartial2Expr = false;
break;
default:
throw new RuntimeException("Unexpected aggregation name " + aggregationName);
}
}
if (hasDifferentPartial2Expr) {
/*
* PARTIAL2.
*/
final GenericUDAFEvaluator.Mode mergeUdafEvaluatorMode = GenericUDAFEvaluator.Mode.PARTIAL2;
doMerge(mergeUdafEvaluatorMode, random, aggregationName, typeInfo, keyGenerationSpec, columns, columnNames, dataAggrMaxKeyCount, reductionFactor, partial1OutputTypeInfo, partial1ResultsArray);
}
final boolean hasDifferentFinalExpr;
if (varianceNames.contains(aggregationName)) {
hasDifferentFinalExpr = true;
} else {
switch(aggregationName) {
case "avg":
hasDifferentFinalExpr = true;
break;
case "bloom_filter":
case "count":
hasDifferentFinalExpr = true;
break;
case "max":
case "min":
case "sum":
hasDifferentFinalExpr = false;
break;
default:
throw new RuntimeException("Unexpected aggregation name " + aggregationName);
}
}
if (hasDifferentFinalExpr) {
/*
* FINAL.
*/
final GenericUDAFEvaluator.Mode mergeUdafEvaluatorMode = GenericUDAFEvaluator.Mode.FINAL;
doMerge(mergeUdafEvaluatorMode, random, aggregationName, typeInfo, keyGenerationSpec, columns, columnNames, dataAggrMaxKeyCount, reductionFactor, partial1OutputTypeInfo, partial1ResultsArray);
}
}
Aggregations