use of org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource in project hive by apache.
the class AggregationBase method doVectorTest.
protected static boolean doVectorTest(String aggregationName, TypeInfo typeInfo, GenericUDAFEvaluator evaluator, TypeInfo outputTypeInfo, GenericUDAFEvaluator.Mode udafEvaluatorMode, int maxKeyCount, List<String> columns, String[] columnNames, TypeInfo[] typeInfos, DataTypePhysicalVariation[] dataTypePhysicalVariations, List<ExprNodeDesc> parameterList, VectorRandomBatchSource batchSource, Object[] results) throws Exception {
HiveConf hiveConf = new HiveConf();
VectorizationContext vectorizationContext = new VectorizationContext("name", columns, Arrays.asList(typeInfos), Arrays.asList(dataTypePhysicalVariations), hiveConf);
ImmutablePair<VectorAggregationDesc, String> pair = Vectorizer.getVectorAggregationDesc(aggregationName, parameterList, evaluator, outputTypeInfo, udafEvaluatorMode, vectorizationContext);
VectorAggregationDesc vecAggrDesc = pair.left;
if (vecAggrDesc == null) {
Assert.fail("No vector aggregation expression found for aggregationName " + aggregationName + " udafEvaluatorMode " + udafEvaluatorMode + " parameterList " + parameterList + " outputTypeInfo " + outputTypeInfo);
}
Class<? extends VectorAggregateExpression> vecAggrClass = vecAggrDesc.getVecAggrClass();
Constructor<? extends VectorAggregateExpression> ctor = null;
try {
ctor = vecAggrClass.getConstructor(VectorAggregationDesc.class);
} catch (Exception e) {
throw new HiveException("Constructor " + vecAggrClass.getSimpleName() + "(VectorAggregationDesc) not available");
}
VectorAggregateExpression vecAggrExpr = null;
try {
vecAggrExpr = ctor.newInstance(vecAggrDesc);
} catch (Exception e) {
throw new HiveException("Failed to create " + vecAggrClass.getSimpleName() + "(VectorAggregationDesc) object ", e);
}
VectorExpression.doTransientInit(vecAggrExpr.getInputExpression(), hiveConf);
// System.out.println("*VECTOR AGGREGATION EXPRESSION* " + vecAggrExpr.getClass().getSimpleName());
/*
System.out.println(
"*DEBUG* typeInfo " + typeInfo.toString() +
" aggregationTestMode VECTOR_MODE" +
" vecAggrExpr " + vecAggrExpr.getClass().getSimpleName());
*/
VectorRandomRowSource rowSource = batchSource.getRowSource();
VectorizedRowBatchCtx batchContext = new VectorizedRowBatchCtx(columnNames, rowSource.typeInfos(), rowSource.dataTypePhysicalVariations(), /* dataColumnNums */
null, /* partitionColumnCount */
0, /* virtualColumnCount */
0, /* neededVirtualColumns */
null, vectorizationContext.getScratchColumnTypeNames(), vectorizationContext.getScratchDataTypePhysicalVariations());
VectorizedRowBatch batch = batchContext.createVectorizedRowBatch();
// Last entry is for a NULL key.
VectorAggregationBufferRow[] vectorAggregationBufferRows = new VectorAggregationBufferRow[maxKeyCount + 1];
VectorAggregationBufferRow[] batchBufferRows;
batchSource.resetBatchIteration();
int rowIndex = 0;
while (true) {
if (!batchSource.fillNextBatch(batch)) {
break;
}
LongColumnVector keyLongColVector = (LongColumnVector) batch.cols[0];
batchBufferRows = new VectorAggregationBufferRow[VectorizedRowBatch.DEFAULT_SIZE];
final int size = batch.size;
boolean selectedInUse = batch.selectedInUse;
int[] selected = batch.selected;
for (int logical = 0; logical < size; logical++) {
final int batchIndex = (selectedInUse ? selected[logical] : logical);
final int keyAdjustedBatchIndex;
if (keyLongColVector.isRepeating) {
keyAdjustedBatchIndex = 0;
} else {
keyAdjustedBatchIndex = batchIndex;
}
final short key;
if (keyLongColVector.noNulls || !keyLongColVector.isNull[keyAdjustedBatchIndex]) {
key = (short) keyLongColVector.vector[keyAdjustedBatchIndex];
} else {
key = (short) maxKeyCount;
}
VectorAggregationBufferRow bufferRow = vectorAggregationBufferRows[key];
if (bufferRow == null) {
VectorAggregateExpression.AggregationBuffer aggregationBuffer = vecAggrExpr.getNewAggregationBuffer();
aggregationBuffer.reset();
VectorAggregateExpression.AggregationBuffer[] aggregationBuffers = new VectorAggregateExpression.AggregationBuffer[] { aggregationBuffer };
bufferRow = new VectorAggregationBufferRow(aggregationBuffers);
vectorAggregationBufferRows[key] = bufferRow;
}
batchBufferRows[logical] = bufferRow;
}
vecAggrExpr.aggregateInputSelection(batchBufferRows, 0, batch);
rowIndex += batch.size;
}
String[] outputColumnNames = new String[] { "output" };
TypeInfo[] outputTypeInfos = new TypeInfo[] { outputTypeInfo };
VectorizedRowBatchCtx outputBatchContext = new VectorizedRowBatchCtx(outputColumnNames, outputTypeInfos, new DataTypePhysicalVariation[] { vecAggrExpr.getOutputDataTypePhysicalVariation() }, /* dataColumnNums */
null, /* partitionColumnCount */
0, /* virtualColumnCount */
0, /* neededVirtualColumns */
null, new String[0], new DataTypePhysicalVariation[0]);
VectorizedRowBatch outputBatch = outputBatchContext.createVectorizedRowBatch();
short[] keys = new short[VectorizedRowBatch.DEFAULT_SIZE];
VectorExtractRow resultVectorExtractRow = new VectorExtractRow();
resultVectorExtractRow.init(new TypeInfo[] { outputTypeInfo }, new int[] { 0 });
Object[] scrqtchRow = new Object[1];
for (short key = 0; key < maxKeyCount + 1; key++) {
VectorAggregationBufferRow vectorAggregationBufferRow = vectorAggregationBufferRows[key];
if (vectorAggregationBufferRow != null) {
if (outputBatch.size == VectorizedRowBatch.DEFAULT_SIZE) {
extractResultObjects(outputBatch, keys, resultVectorExtractRow, outputTypeInfo, scrqtchRow, results);
outputBatch.reset();
}
keys[outputBatch.size] = key;
VectorAggregateExpression.AggregationBuffer aggregationBuffer = vectorAggregationBufferRow.getAggregationBuffer(0);
vecAggrExpr.assignRowColumn(outputBatch, outputBatch.size++, 0, aggregationBuffer);
}
}
if (outputBatch.size > 0) {
extractResultObjects(outputBatch, keys, resultVectorExtractRow, outputTypeInfo, scrqtchRow, results);
}
return true;
}
use of org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource in project hive by apache.
the class TestVectorBetweenIn method doVectorBetweenInTest.
private boolean doVectorBetweenInTest(TypeInfo typeInfo, BetweenInVariation betweenInVariation, List<Object> compareList, List<String> columns, String[] columnNames, TypeInfo[] typeInfos, DataTypePhysicalVariation[] dataTypePhysicalVariations, List<ExprNodeDesc> children, GenericUDF udf, ExprNodeGenericFuncDesc exprDesc, BetweenInTestMode betweenInTestMode, VectorRandomBatchSource batchSource, ObjectInspector objectInspector, TypeInfo outputTypeInfo, Object[] resultObjects) throws Exception {
HiveConf hiveConf = new HiveConf();
if (betweenInTestMode == BetweenInTestMode.ADAPTOR) {
hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_TEST_VECTOR_ADAPTOR_OVERRIDE, true);
}
final boolean isFilter = betweenInVariation.isFilter;
VectorizationContext vectorizationContext = new VectorizationContext("name", columns, Arrays.asList(typeInfos), Arrays.asList(dataTypePhysicalVariations), hiveConf);
VectorExpression vectorExpression = vectorizationContext.getVectorExpression(exprDesc, (isFilter ? VectorExpressionDescriptor.Mode.FILTER : VectorExpressionDescriptor.Mode.PROJECTION));
vectorExpression.transientInit(hiveConf);
if (betweenInTestMode == BetweenInTestMode.VECTOR_EXPRESSION) {
String vecExprString = vectorExpression.toString();
if (vectorExpression instanceof VectorUDFAdaptor) {
System.out.println("*NO NATIVE VECTOR EXPRESSION* typeInfo " + typeInfo.toString() + " betweenInTestMode " + betweenInTestMode + " betweenInVariation " + betweenInVariation + " vectorExpression " + vecExprString);
} else if (dataTypePhysicalVariations[0] == DataTypePhysicalVariation.DECIMAL_64) {
final String nameToCheck = vectorExpression.getClass().getSimpleName();
if (!nameToCheck.contains("Decimal64")) {
System.out.println("*EXPECTED DECIMAL_64 VECTOR EXPRESSION* typeInfo " + typeInfo.toString() + " betweenInTestMode " + betweenInTestMode + " betweenInVariation " + betweenInVariation + " vectorExpression " + vecExprString);
}
}
}
// System.out.println("*VECTOR EXPRESSION* " + vectorExpression.getClass().getSimpleName());
/*
System.out.println(
"*DEBUG* typeInfo " + typeInfo.toString() +
" betweenInTestMode " + betweenInTestMode +
" betweenInVariation " + betweenInVariation +
" vectorExpression " + vectorExpression.toString());
*/
VectorRandomRowSource rowSource = batchSource.getRowSource();
VectorizedRowBatchCtx batchContext = new VectorizedRowBatchCtx(columnNames, rowSource.typeInfos(), rowSource.dataTypePhysicalVariations(), /* dataColumnNums */
null, /* partitionColumnCount */
0, /* virtualColumnCount */
0, /* neededVirtualColumns */
null, vectorizationContext.getScratchColumnTypeNames(), vectorizationContext.getScratchDataTypePhysicalVariations());
VectorizedRowBatch batch = batchContext.createVectorizedRowBatch();
VectorExtractRow resultVectorExtractRow = null;
Object[] scrqtchRow = null;
if (!isFilter) {
resultVectorExtractRow = new VectorExtractRow();
final int outputColumnNum = vectorExpression.getOutputColumnNum();
resultVectorExtractRow.init(new TypeInfo[] { outputTypeInfo }, new int[] { outputColumnNum });
scrqtchRow = new Object[1];
}
boolean copySelectedInUse = false;
int[] copySelected = new int[VectorizedRowBatch.DEFAULT_SIZE];
batchSource.resetBatchIteration();
int rowIndex = 0;
while (true) {
if (!batchSource.fillNextBatch(batch)) {
break;
}
final int originalBatchSize = batch.size;
if (isFilter) {
copySelectedInUse = batch.selectedInUse;
if (batch.selectedInUse) {
System.arraycopy(batch.selected, 0, copySelected, 0, originalBatchSize);
}
}
// In filter mode, the batch size can be made smaller.
vectorExpression.evaluate(batch);
if (!isFilter) {
extractResultObjects(batch, rowIndex, resultVectorExtractRow, scrqtchRow, objectInspector, resultObjects);
} else {
final int currentBatchSize = batch.size;
if (copySelectedInUse && batch.selectedInUse) {
int selectIndex = 0;
for (int i = 0; i < originalBatchSize; i++) {
final int originalBatchIndex = copySelected[i];
final boolean booleanResult;
if (selectIndex < currentBatchSize && batch.selected[selectIndex] == originalBatchIndex) {
booleanResult = true;
selectIndex++;
} else {
booleanResult = false;
}
resultObjects[rowIndex + i] = new BooleanWritable(booleanResult);
}
} else if (batch.selectedInUse) {
int selectIndex = 0;
for (int i = 0; i < originalBatchSize; i++) {
final boolean booleanResult;
if (selectIndex < currentBatchSize && batch.selected[selectIndex] == i) {
booleanResult = true;
selectIndex++;
} else {
booleanResult = false;
}
resultObjects[rowIndex + i] = new BooleanWritable(booleanResult);
}
} else if (currentBatchSize == 0) {
// Whole batch got zapped.
for (int i = 0; i < originalBatchSize; i++) {
resultObjects[rowIndex + i] = new BooleanWritable(false);
}
} else {
// Every row kept.
for (int i = 0; i < originalBatchSize; i++) {
resultObjects[rowIndex + i] = new BooleanWritable(true);
}
}
}
rowIndex += originalBatchSize;
}
return true;
}
use of org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource in project hive by apache.
the class TestVectorBetweenIn method doBetweenInVariation.
private boolean doBetweenInVariation(Random random, String typeName, boolean tryDecimal64, BetweenInVariation betweenInVariation, int subVariation) throws Exception {
TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeName);
boolean isDecimal64 = checkDecimal64(tryDecimal64, typeInfo);
DataTypePhysicalVariation dataTypePhysicalVariation = (isDecimal64 ? DataTypePhysicalVariation.DECIMAL_64 : DataTypePhysicalVariation.NONE);
final int decimal64Scale = (isDecimal64 ? ((DecimalTypeInfo) typeInfo).getScale() : 0);
// ----------------------------------------------------------------------------------------------
ObjectInspector objectInspector = TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(typeInfo);
final int valueCount = 10 + random.nextInt(10);
List<Object> valueList = new ArrayList<Object>(valueCount);
for (int i = 0; i < valueCount; i++) {
valueList.add(VectorRandomRowSource.randomWritable(random, typeInfo, objectInspector, dataTypePhysicalVariation, /* allowNull */
false));
}
final boolean isBetween = (betweenInVariation == BetweenInVariation.FILTER_BETWEEN || betweenInVariation == BetweenInVariation.FILTER_NOT_BETWEEN || betweenInVariation == BetweenInVariation.PROJECTION_BETWEEN || betweenInVariation == BetweenInVariation.PROJECTION_NOT_BETWEEN);
List<Object> compareList = new ArrayList<Object>();
List<Object> sortedList = new ArrayList<Object>(valueCount);
sortedList.addAll(valueList);
Object exampleObject = valueList.get(0);
WritableComparator writableComparator = WritableComparator.get((Class<? extends WritableComparable>) exampleObject.getClass());
sortedList.sort(writableComparator);
final boolean isInvert;
if (isBetween) {
// FILTER_BETWEEN
// FILTER_NOT_BETWEEN
// PROJECTION_BETWEEN
// PROJECTION_NOT_BETWEEN
isInvert = (betweenInVariation == BetweenInVariation.FILTER_NOT_BETWEEN || betweenInVariation == BetweenInVariation.PROJECTION_NOT_BETWEEN);
switch(subVariation) {
case 0:
// Range covers all values exactly.
compareList.add(sortedList.get(0));
compareList.add(sortedList.get(valueCount - 1));
break;
case 1:
// Exclude the first and last sorted.
compareList.add(sortedList.get(1));
compareList.add(sortedList.get(valueCount - 2));
break;
case 2:
// Only last 2 sorted.
compareList.add(sortedList.get(valueCount - 2));
compareList.add(sortedList.get(valueCount - 1));
break;
case 3:
case 4:
case 5:
case 6:
{
// Choose 2 adjacent in the middle.
Object min = sortedList.get(5);
Object max = sortedList.get(6);
compareList.add(min);
compareList.add(max);
if (subVariation == 4) {
removeValue(valueList, min);
} else if (subVariation == 5) {
removeValue(valueList, max);
} else if (subVariation == 6) {
removeValue(valueList, min);
removeValue(valueList, max);
}
}
break;
default:
return false;
}
} else {
// FILTER_IN.
// PROJECTION_IN.
isInvert = false;
switch(subVariation) {
case 0:
// All values.
compareList.addAll(valueList);
break;
case 1:
// Don't include the first and last sorted.
for (int i = 1; i < valueCount - 1; i++) {
compareList.add(valueList.get(i));
}
break;
case 2:
// The even ones.
for (int i = 2; i < valueCount; i += 2) {
compareList.add(valueList.get(i));
}
break;
case 3:
{
// Choose 2 adjacent in the middle.
Object min = sortedList.get(5);
Object max = sortedList.get(6);
compareList.add(min);
compareList.add(max);
if (subVariation == 4) {
removeValue(valueList, min);
} else if (subVariation == 5) {
removeValue(valueList, max);
} else if (subVariation == 6) {
removeValue(valueList, min);
removeValue(valueList, max);
}
}
break;
default:
return false;
}
}
// ----------------------------------------------------------------------------------------------
GenerationSpec generationSpec = GenerationSpec.createValueList(typeInfo, valueList);
List<GenerationSpec> generationSpecList = new ArrayList<GenerationSpec>();
List<DataTypePhysicalVariation> explicitDataTypePhysicalVariationList = new ArrayList<DataTypePhysicalVariation>();
generationSpecList.add(generationSpec);
explicitDataTypePhysicalVariationList.add(dataTypePhysicalVariation);
VectorRandomRowSource rowSource = new VectorRandomRowSource();
rowSource.initGenerationSpecSchema(random, generationSpecList, /* maxComplexDepth */
0, /* allowNull */
true, /* isUnicodeOk */
true, explicitDataTypePhysicalVariationList);
List<String> columns = new ArrayList<String>();
String col1Name = rowSource.columnNames().get(0);
columns.add(col1Name);
final ExprNodeDesc col1Expr = new ExprNodeColumnDesc(typeInfo, col1Name, "table", false);
List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>();
if (isBetween) {
children.add(new ExprNodeConstantDesc(Boolean.valueOf(isInvert)));
}
children.add(col1Expr);
for (Object compareObject : compareList) {
ExprNodeConstantDesc constDesc = new ExprNodeConstantDesc(typeInfo, VectorRandomRowSource.getNonWritableObject(compareObject, typeInfo, objectInspector));
children.add(constDesc);
}
String[] columnNames = columns.toArray(new String[0]);
Object[][] randomRows = rowSource.randomRows(100000);
VectorRandomBatchSource batchSource = VectorRandomBatchSource.createInterestingBatches(random, rowSource, randomRows, null);
final GenericUDF udf;
final ObjectInspector outputObjectInspector;
if (isBetween) {
udf = new GenericUDFBetween();
// First argument is boolean invert. Arguments 1..3 are inspectors for range limits...
ObjectInspector[] argumentOIs = new ObjectInspector[4];
argumentOIs[0] = PrimitiveObjectInspectorFactory.writableBooleanObjectInspector;
argumentOIs[1] = objectInspector;
argumentOIs[2] = objectInspector;
argumentOIs[3] = objectInspector;
outputObjectInspector = udf.initialize(argumentOIs);
} else {
final int compareCount = compareList.size();
udf = new GenericUDFIn();
ObjectInspector[] argumentOIs = new ObjectInspector[compareCount];
ConstantObjectInspector constantObjectInspector = (ConstantObjectInspector) children.get(1).getWritableObjectInspector();
for (int i = 0; i < compareCount; i++) {
argumentOIs[i] = constantObjectInspector;
}
outputObjectInspector = udf.initialize(argumentOIs);
}
TypeInfo outputTypeInfo = TypeInfoUtils.getTypeInfoFromObjectInspector(outputObjectInspector);
ExprNodeGenericFuncDesc exprDesc = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, udf, children);
return executeTestModesAndVerify(typeInfo, betweenInVariation, compareList, columns, columnNames, children, udf, exprDesc, randomRows, rowSource, batchSource, outputTypeInfo, /* skipAdaptor */
false);
}
use of org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource in project hive by apache.
the class TestVectorBetweenIn method doBetweenStructInVariation.
private boolean doBetweenStructInVariation(Random random, String structTypeName, BetweenInVariation betweenInVariation) throws Exception {
StructTypeInfo structTypeInfo = (StructTypeInfo) TypeInfoUtils.getTypeInfoFromTypeString(structTypeName);
ObjectInspector structObjectInspector = TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(structTypeInfo);
final int valueCount = 10 + random.nextInt(10);
List<Object> valueList = new ArrayList<Object>(valueCount);
for (int i = 0; i < valueCount; i++) {
valueList.add(VectorRandomRowSource.randomWritable(random, structTypeInfo, structObjectInspector, DataTypePhysicalVariation.NONE, /* allowNull */
false));
}
final boolean isInvert = false;
// No convenient WritableComparator / WritableComparable available for STRUCT.
List<Object> compareList = new ArrayList<Object>();
Set<Integer> includedSet = new HashSet<Integer>();
final int chooseLimit = 4 + random.nextInt(valueCount / 2);
int chooseCount = 0;
while (chooseCount < chooseLimit) {
final int index = random.nextInt(valueCount);
if (includedSet.contains(index)) {
continue;
}
includedSet.add(index);
compareList.add(valueList.get(index));
chooseCount++;
}
// ----------------------------------------------------------------------------------------------
GenerationSpec structGenerationSpec = GenerationSpec.createValueList(structTypeInfo, valueList);
List<GenerationSpec> structGenerationSpecList = new ArrayList<GenerationSpec>();
List<DataTypePhysicalVariation> structExplicitDataTypePhysicalVariationList = new ArrayList<DataTypePhysicalVariation>();
structGenerationSpecList.add(structGenerationSpec);
structExplicitDataTypePhysicalVariationList.add(DataTypePhysicalVariation.NONE);
VectorRandomRowSource structRowSource = new VectorRandomRowSource();
structRowSource.initGenerationSpecSchema(random, structGenerationSpecList, /* maxComplexDepth */
0, /* allowNull */
true, /* isUnicodeOk */
true, structExplicitDataTypePhysicalVariationList);
Object[][] structRandomRows = structRowSource.randomRows(100000);
// ---------------------------------------------------------------------------------------------
List<GenerationSpec> generationSpecList = new ArrayList<GenerationSpec>();
List<DataTypePhysicalVariation> explicitDataTypePhysicalVariationList = new ArrayList<DataTypePhysicalVariation>();
List<TypeInfo> fieldTypeInfoList = structTypeInfo.getAllStructFieldTypeInfos();
final int fieldCount = fieldTypeInfoList.size();
for (int i = 0; i < fieldCount; i++) {
GenerationSpec generationSpec = GenerationSpec.createOmitGeneration(fieldTypeInfoList.get(i));
generationSpecList.add(generationSpec);
explicitDataTypePhysicalVariationList.add(DataTypePhysicalVariation.NONE);
}
VectorRandomRowSource rowSource = new VectorRandomRowSource();
rowSource.initGenerationSpecSchema(random, generationSpecList, /* maxComplexDepth */
0, /* allowNull */
true, /* isUnicodeOk */
true, explicitDataTypePhysicalVariationList);
Object[][] randomRows = rowSource.randomRows(100000);
final int rowCount = randomRows.length;
for (int r = 0; r < rowCount; r++) {
List<Object> fieldValueList = (ArrayList) structRandomRows[r][0];
for (int f = 0; f < fieldCount; f++) {
randomRows[r][f] = fieldValueList.get(f);
}
}
// ---------------------------------------------------------------------------------------------
// Currently, STRUCT IN vectorization assumes a GenericUDFStruct.
List<ObjectInspector> structUdfObjectInspectorList = new ArrayList<ObjectInspector>();
List<ExprNodeDesc> structUdfChildren = new ArrayList<ExprNodeDesc>(fieldCount);
List<String> rowColumnNameList = rowSource.columnNames();
for (int i = 0; i < fieldCount; i++) {
TypeInfo fieldTypeInfo = fieldTypeInfoList.get(i);
ExprNodeColumnDesc fieldExpr = new ExprNodeColumnDesc(fieldTypeInfo, rowColumnNameList.get(i), "table", false);
structUdfChildren.add(fieldExpr);
ObjectInspector fieldObjectInspector = VectorRandomRowSource.getObjectInspector(fieldTypeInfo, DataTypePhysicalVariation.NONE);
structUdfObjectInspectorList.add(fieldObjectInspector);
}
StandardStructObjectInspector structUdfObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(rowColumnNameList, structUdfObjectInspectorList);
String structUdfTypeName = structUdfObjectInspector.getTypeName();
TypeInfo structUdfTypeInfo = TypeInfoUtils.getTypeInfoFromTypeString(structUdfTypeName);
String structFuncText = "struct";
FunctionInfo fi = FunctionRegistry.getFunctionInfo(structFuncText);
GenericUDF genericUDF = fi.getGenericUDF();
ExprNodeDesc col1Expr = new ExprNodeGenericFuncDesc(structUdfObjectInspector, genericUDF, structFuncText, structUdfChildren);
// ---------------------------------------------------------------------------------------------
List<String> columns = new ArrayList<String>();
List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>();
children.add(col1Expr);
for (int i = 0; i < compareList.size(); i++) {
Object compareObject = compareList.get(i);
ExprNodeConstantDesc constDesc = new ExprNodeConstantDesc(structUdfTypeInfo, VectorRandomRowSource.getNonWritableObject(compareObject, structUdfTypeInfo, structUdfObjectInspector));
children.add(constDesc);
}
for (int i = 0; i < fieldCount; i++) {
columns.add(rowColumnNameList.get(i));
}
String[] columnNames = columns.toArray(new String[0]);
VectorRandomBatchSource batchSource = VectorRandomBatchSource.createInterestingBatches(random, rowSource, randomRows, null);
// ---------------------------------------------------------------------------------------------
final GenericUDF udf = new GenericUDFIn();
final int compareCount = compareList.size();
ObjectInspector[] argumentOIs = new ObjectInspector[compareCount];
for (int i = 0; i < compareCount; i++) {
argumentOIs[i] = structUdfObjectInspector;
}
final ObjectInspector outputObjectInspector = udf.initialize(argumentOIs);
TypeInfo outputTypeInfo = TypeInfoUtils.getTypeInfoFromObjectInspector(outputObjectInspector);
ExprNodeGenericFuncDesc exprDesc = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, udf, children);
return executeTestModesAndVerify(structUdfTypeInfo, betweenInVariation, compareList, columns, columnNames, children, udf, exprDesc, randomRows, rowSource, batchSource, outputTypeInfo, /* skipAdaptor */
true);
}
use of org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource in project hive by apache.
the class TestVectorCastStatement method doIfTestOneCast.
private void doIfTestOneCast(Random random, String typeName, DataTypePhysicalVariation dataTypePhysicalVariation, PrimitiveCategory targetPrimitiveCategory) throws Exception {
TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeName);
PrimitiveCategory primitiveCategory = ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory();
boolean isDecimal64 = (dataTypePhysicalVariation == DataTypePhysicalVariation.DECIMAL_64);
final int decimal64Scale = (isDecimal64 ? ((DecimalTypeInfo) typeInfo).getScale() : 0);
// ----------------------------------------------------------------------------------------------
String targetTypeName;
if (targetPrimitiveCategory == PrimitiveCategory.BYTE) {
targetTypeName = "tinyint";
} else if (targetPrimitiveCategory == PrimitiveCategory.SHORT) {
targetTypeName = "smallint";
} else if (targetPrimitiveCategory == PrimitiveCategory.LONG) {
targetTypeName = "bigint";
} else {
targetTypeName = targetPrimitiveCategory.name().toLowerCase();
}
targetTypeName = VectorRandomRowSource.getDecoratedTypeName(random, targetTypeName);
TypeInfo targetTypeInfo = TypeInfoUtils.getTypeInfoFromTypeString(targetTypeName);
// ----------------------------------------------------------------------------------------------
GenerationSpec generationSpec;
if (needsValidDataTypeData(targetTypeInfo) && (primitiveCategory == PrimitiveCategory.STRING || primitiveCategory == PrimitiveCategory.CHAR || primitiveCategory == PrimitiveCategory.VARCHAR)) {
generationSpec = GenerationSpec.createStringFamilyOtherTypeValue(typeInfo, targetTypeInfo);
} else {
generationSpec = GenerationSpec.createSameType(typeInfo);
}
List<GenerationSpec> generationSpecList = new ArrayList<GenerationSpec>();
List<DataTypePhysicalVariation> explicitDataTypePhysicalVariationList = new ArrayList<DataTypePhysicalVariation>();
generationSpecList.add(generationSpec);
explicitDataTypePhysicalVariationList.add(dataTypePhysicalVariation);
VectorRandomRowSource rowSource = new VectorRandomRowSource();
rowSource.initGenerationSpecSchema(random, generationSpecList, /* maxComplexDepth */
0, /* allowNull */
true, /* isUnicodeOk */
true, explicitDataTypePhysicalVariationList);
List<String> columns = new ArrayList<String>();
columns.add("col1");
ExprNodeColumnDesc col1Expr = new ExprNodeColumnDesc(typeInfo, "col1", "table", false);
List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>();
children.add(col1Expr);
String[] columnNames = columns.toArray(new String[0]);
Object[][] randomRows = rowSource.randomRows(100000);
VectorRandomBatchSource batchSource = VectorRandomBatchSource.createInterestingBatches(random, rowSource, randomRows, null);
final int rowCount = randomRows.length;
Object[][] resultObjectsArray = new Object[CastStmtTestMode.count][];
for (int i = 0; i < CastStmtTestMode.count; i++) {
Object[] resultObjects = new Object[rowCount];
resultObjectsArray[i] = resultObjects;
CastStmtTestMode ifStmtTestMode = CastStmtTestMode.values()[i];
switch(ifStmtTestMode) {
case ROW_MODE:
if (!doRowCastTest(typeInfo, targetTypeInfo, columns, children, randomRows, rowSource.rowStructObjectInspector(), resultObjects)) {
return;
}
break;
case ADAPTOR:
case VECTOR_EXPRESSION:
if (!doVectorCastTest(typeInfo, targetTypeInfo, columns, columnNames, rowSource.typeInfos(), rowSource.dataTypePhysicalVariations(), children, ifStmtTestMode, batchSource, resultObjects)) {
return;
}
break;
default:
throw new RuntimeException("Unexpected IF statement test mode " + ifStmtTestMode);
}
}
for (int i = 0; i < rowCount; i++) {
// Row-mode is the expected value.
Object expectedResult = resultObjectsArray[0][i];
for (int v = 1; v < CastStmtTestMode.count; v++) {
Object vectorResult = resultObjectsArray[v][i];
if (expectedResult == null || vectorResult == null) {
if (expectedResult != null || vectorResult != null) {
Assert.fail("Row " + i + " sourceTypeName " + typeName + " targetTypeName " + targetTypeName + " " + CastStmtTestMode.values()[v] + " result is NULL " + (vectorResult == null ? "YES" : "NO result " + vectorResult.toString()) + " does not match row-mode expected result is NULL " + (expectedResult == null ? "YES" : "NO result " + expectedResult.toString()) + " row values " + Arrays.toString(randomRows[i]));
}
} else {
if (isDecimal64 && expectedResult instanceof LongWritable) {
HiveDecimalWritable expectedHiveDecimalWritable = new HiveDecimalWritable(0);
expectedHiveDecimalWritable.deserialize64(((LongWritable) expectedResult).get(), decimal64Scale);
expectedResult = expectedHiveDecimalWritable;
}
if (!expectedResult.equals(vectorResult)) {
Assert.fail("Row " + i + " sourceTypeName " + typeName + " targetTypeName " + targetTypeName + " " + CastStmtTestMode.values()[v] + " result " + vectorResult.toString() + " (" + vectorResult.getClass().getSimpleName() + ")" + " does not match row-mode expected result " + expectedResult.toString() + " (" + expectedResult.getClass().getSimpleName() + ")" + " row values " + Arrays.toString(randomRows[i]));
}
}
}
}
}
Aggregations