use of org.apache.hadoop.hive.ql.udf.generic.GenericUDAFCount.GenericUDAFCountEvaluator in project hive by apache.
the class TestVectorAggregation method doTests.
private void doTests(Random random, String aggregationName, TypeInfo typeInfo, boolean isCountStar, boolean tryDecimal64) throws Exception {
List<GenerationSpec> dataAggrGenerationSpecList = new ArrayList<GenerationSpec>();
List<DataTypePhysicalVariation> explicitDataTypePhysicalVariationList = new ArrayList<DataTypePhysicalVariation>();
TypeInfo keyTypeInfo = TypeInfoFactory.shortTypeInfo;
GenerationSpec keyGenerationSpec = GenerationSpec.createOmitGeneration(keyTypeInfo);
dataAggrGenerationSpecList.add(keyGenerationSpec);
explicitDataTypePhysicalVariationList.add(DataTypePhysicalVariation.NONE);
final boolean decimal64Enable = checkDecimal64(tryDecimal64, typeInfo);
GenerationSpec generationSpec = GenerationSpec.createSameType(typeInfo);
dataAggrGenerationSpecList.add(generationSpec);
explicitDataTypePhysicalVariationList.add(decimal64Enable ? DataTypePhysicalVariation.DECIMAL_64 : DataTypePhysicalVariation.NONE);
List<String> columns = new ArrayList<String>();
columns.add("col0");
columns.add("col1");
ExprNodeColumnDesc dataAggrCol1Expr = new ExprNodeColumnDesc(typeInfo, "col1", "table", false);
List<ExprNodeDesc> dataAggrParameters = new ArrayList<ExprNodeDesc>();
if (!isCountStar) {
dataAggrParameters.add(dataAggrCol1Expr);
}
final int dataAggrParameterCount = dataAggrParameters.size();
ObjectInspector[] dataAggrParameterObjectInspectors = new ObjectInspector[dataAggrParameterCount];
for (int i = 0; i < dataAggrParameterCount; i++) {
TypeInfo paramTypeInfo = dataAggrParameters.get(i).getTypeInfo();
dataAggrParameterObjectInspectors[i] = TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(paramTypeInfo);
}
String[] columnNames = columns.toArray(new String[0]);
final int dataAggrMaxKeyCount = 20000;
final int reductionFactor = 16;
ObjectInspector keyObjectInspector = VectorRandomRowSource.getObjectInspector(keyTypeInfo);
/*
* PARTIAL1.
*/
VectorRandomRowSource partial1RowSource = new VectorRandomRowSource();
boolean allowNull = !aggregationName.equals("bloom_filter");
partial1RowSource.initGenerationSpecSchema(random, dataAggrGenerationSpecList, /* maxComplexDepth */
0, allowNull, /* isUnicodeOk */
true, explicitDataTypePhysicalVariationList);
Object[][] partial1RandomRows = partial1RowSource.randomRows(TEST_ROW_COUNT);
final int partial1RowCount = partial1RandomRows.length;
for (int i = 0; i < partial1RowCount; i++) {
final short shortKey = (short) getLinearRandomNumber(random, dataAggrMaxKeyCount);
partial1RandomRows[i][0] = ((WritableShortObjectInspector) keyObjectInspector).create((short) shortKey);
}
VectorRandomBatchSource partial1BatchSource = VectorRandomBatchSource.createInterestingBatches(random, partial1RowSource, partial1RandomRows, null);
GenericUDAFEvaluator partial1Evaluator = getEvaluator(aggregationName, typeInfo);
if (isCountStar) {
Assert.assertTrue(partial1Evaluator instanceof GenericUDAFCountEvaluator);
GenericUDAFCountEvaluator countEvaluator = (GenericUDAFCountEvaluator) partial1Evaluator;
countEvaluator.setCountAllColumns(true);
}
/*
System.out.println(
"*DEBUG* GenericUDAFEvaluator for " + aggregationName + ", " + typeInfo.getTypeName() + ": " +
partial1Evaluator.getClass().getSimpleName());
*/
// The only way to get the return object inspector (and its return type) is to
// initialize it...
final GenericUDAFEvaluator.Mode partial1UdafEvaluatorMode = GenericUDAFEvaluator.Mode.PARTIAL1;
ObjectInspector partial1ReturnOI = partial1Evaluator.init(partial1UdafEvaluatorMode, dataAggrParameterObjectInspectors);
TypeInfo partial1OutputTypeInfo = TypeInfoUtils.getTypeInfoFromObjectInspector(partial1ReturnOI);
Object[] partial1ResultsArray = new Object[AggregationTestMode.count];
executeAggregationTests(aggregationName, typeInfo, partial1Evaluator, partial1OutputTypeInfo, partial1UdafEvaluatorMode, dataAggrMaxKeyCount, columns, columnNames, dataAggrParameters, partial1RandomRows, partial1RowSource, partial1BatchSource, tryDecimal64, partial1ResultsArray);
verifyAggregationResults(typeInfo, partial1OutputTypeInfo, dataAggrMaxKeyCount, partial1UdafEvaluatorMode, partial1ResultsArray);
final boolean hasDifferentCompleteExpr;
if (varianceNames.contains(aggregationName)) {
hasDifferentCompleteExpr = true;
} else {
switch(aggregationName) {
case "avg":
hasDifferentCompleteExpr = true;
break;
case "bloom_filter":
case "count":
case "max":
case "min":
case "sum":
hasDifferentCompleteExpr = false;
break;
default:
throw new RuntimeException("Unexpected aggregation name " + aggregationName);
}
}
if (hasDifferentCompleteExpr) {
/*
* COMPLETE.
*/
VectorRandomRowSource completeRowSource = new VectorRandomRowSource();
completeRowSource.initGenerationSpecSchema(random, dataAggrGenerationSpecList, /* maxComplexDepth */
0, /* allowNull */
true, /* isUnicodeOk */
true, explicitDataTypePhysicalVariationList);
Object[][] completeRandomRows = completeRowSource.randomRows(TEST_ROW_COUNT);
final int completeRowCount = completeRandomRows.length;
for (int i = 0; i < completeRowCount; i++) {
final short shortKey = (short) getLinearRandomNumber(random, dataAggrMaxKeyCount);
completeRandomRows[i][0] = ((WritableShortObjectInspector) keyObjectInspector).create((short) shortKey);
}
VectorRandomBatchSource completeBatchSource = VectorRandomBatchSource.createInterestingBatches(random, completeRowSource, completeRandomRows, null);
GenericUDAFEvaluator completeEvaluator = getEvaluator(aggregationName, typeInfo);
/*
System.out.println(
"*DEBUG* GenericUDAFEvaluator for " + aggregationName + ", " + typeInfo.getTypeName() + ": " +
completeEvaluator.getClass().getSimpleName());
*/
// The only way to get the return object inspector (and its return type) is to
// initialize it...
final GenericUDAFEvaluator.Mode completeUdafEvaluatorMode = GenericUDAFEvaluator.Mode.COMPLETE;
ObjectInspector completeReturnOI = completeEvaluator.init(completeUdafEvaluatorMode, dataAggrParameterObjectInspectors);
TypeInfo completeOutputTypeInfo = TypeInfoUtils.getTypeInfoFromObjectInspector(completeReturnOI);
Object[] completeResultsArray = new Object[AggregationTestMode.count];
executeAggregationTests(aggregationName, typeInfo, completeEvaluator, completeOutputTypeInfo, completeUdafEvaluatorMode, dataAggrMaxKeyCount, columns, columnNames, dataAggrParameters, completeRandomRows, completeRowSource, completeBatchSource, tryDecimal64, completeResultsArray);
verifyAggregationResults(typeInfo, completeOutputTypeInfo, dataAggrMaxKeyCount, completeUdafEvaluatorMode, completeResultsArray);
}
final boolean hasDifferentPartial2Expr;
if (varianceNames.contains(aggregationName)) {
hasDifferentPartial2Expr = true;
} else {
switch(aggregationName) {
case "avg":
hasDifferentPartial2Expr = true;
break;
case "bloom_filter":
case "count":
case "max":
case "min":
case "sum":
hasDifferentPartial2Expr = false;
break;
default:
throw new RuntimeException("Unexpected aggregation name " + aggregationName);
}
}
if (hasDifferentPartial2Expr) {
/*
* PARTIAL2.
*/
final GenericUDAFEvaluator.Mode mergeUdafEvaluatorMode = GenericUDAFEvaluator.Mode.PARTIAL2;
doMerge(mergeUdafEvaluatorMode, random, aggregationName, typeInfo, keyGenerationSpec, columns, columnNames, dataAggrMaxKeyCount, reductionFactor, partial1OutputTypeInfo, partial1ResultsArray);
}
final boolean hasDifferentFinalExpr;
if (varianceNames.contains(aggregationName)) {
hasDifferentFinalExpr = true;
} else {
switch(aggregationName) {
case "avg":
hasDifferentFinalExpr = true;
break;
case "bloom_filter":
case "count":
hasDifferentFinalExpr = true;
break;
case "max":
case "min":
case "sum":
hasDifferentFinalExpr = false;
break;
default:
throw new RuntimeException("Unexpected aggregation name " + aggregationName);
}
}
if (hasDifferentFinalExpr) {
/*
* FINAL.
*/
final GenericUDAFEvaluator.Mode mergeUdafEvaluatorMode = GenericUDAFEvaluator.Mode.FINAL;
doMerge(mergeUdafEvaluatorMode, random, aggregationName, typeInfo, keyGenerationSpec, columns, columnNames, dataAggrMaxKeyCount, reductionFactor, partial1OutputTypeInfo, partial1ResultsArray);
}
}
use of org.apache.hadoop.hive.ql.udf.generic.GenericUDAFCount.GenericUDAFCountEvaluator in project hive by apache.
the class AggregationBase method doRowTest.
protected static boolean doRowTest(TypeInfo typeInfo, GenericUDAFEvaluator evaluator, TypeInfo outputTypeInfo, GenericUDAFEvaluator.Mode udafEvaluatorMode, int maxKeyCount, List<String> columns, List<ExprNodeDesc> children, Object[][] randomRows, ObjectInspector rowInspector, Object[] results) throws Exception {
// System.out.println("*ROW AGGREGATION EXPRESSION* " + evaluator.getClass().getSimpleName());
/*
System.out.println(
"*DEBUG* typeInfo " + typeInfo.toString() +
" aggregationTestMode ROW_MODE" +
" outputTypeInfo " + outputTypeInfo.toString());
*/
// Last entry is for a NULL key.
AggregationBuffer[] aggregationBuffers = new AggregationBuffer[maxKeyCount + 1];
ObjectInspector objectInspector = TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(outputTypeInfo);
final boolean isCountStar;
if (evaluator instanceof GenericUDAFCountEvaluator) {
GenericUDAFCountEvaluator countEvaluator = (GenericUDAFCountEvaluator) evaluator;
isCountStar = countEvaluator.getCountAllColumns();
} else {
isCountStar = false;
}
final Object[] parameterArray = isCountStar ? new Object[0] : new Object[1];
final int rowCount = randomRows.length;
for (int i = 0; i < rowCount; i++) {
Object[] row = randomRows[i];
ShortWritable shortWritable = (ShortWritable) row[0];
final int key;
if (shortWritable == null) {
key = maxKeyCount;
} else {
key = shortWritable.get();
}
AggregationBuffer aggregationBuffer = aggregationBuffers[key];
if (aggregationBuffer == null) {
aggregationBuffer = evaluator.getNewAggregationBuffer();
aggregationBuffers[key] = aggregationBuffer;
}
if (!isCountStar) {
parameterArray[0] = row[1];
}
evaluator.aggregate(aggregationBuffer, parameterArray);
}
final boolean isPrimitive = (outputTypeInfo instanceof PrimitiveTypeInfo);
final boolean isPartial = (udafEvaluatorMode == GenericUDAFEvaluator.Mode.PARTIAL1 || udafEvaluatorMode == GenericUDAFEvaluator.Mode.PARTIAL2);
for (short key = 0; key < maxKeyCount + 1; key++) {
AggregationBuffer aggregationBuffer = aggregationBuffers[key];
if (aggregationBuffer != null) {
final Object result;
if (isPartial) {
result = evaluator.terminatePartial(aggregationBuffer);
} else {
result = evaluator.terminate(aggregationBuffer);
}
Object copyResult;
if (result == null) {
copyResult = null;
} else if (isPrimitive) {
copyResult = VectorRandomRowSource.getWritablePrimitiveObject((PrimitiveTypeInfo) outputTypeInfo, objectInspector, result);
} else {
copyResult = ObjectInspectorUtils.copyToStandardObject(result, objectInspector, ObjectInspectorCopyOption.WRITABLE);
}
results[key] = copyResult;
}
}
return true;
}
Aggregations