use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.booleanTypeInfo in project hive by apache.
the class VectorizationContext method getCastToBooleanExpression.
private VectorExpression getCastToBooleanExpression(List<ExprNodeDesc> childExpr, VectorExpressionDescriptor.Mode mode) throws HiveException {
ExprNodeDesc child = childExpr.get(0);
TypeInfo inputTypeInfo = child.getTypeInfo();
String inputType = inputTypeInfo.toString();
if (child instanceof ExprNodeConstantDesc) {
if (null == ((ExprNodeConstantDesc) child).getValue()) {
return getConstantVectorExpression(null, TypeInfoFactory.booleanTypeInfo, mode);
}
// Family of related JIRAs: HIVE-7421, HIVE-7422, and HIVE-7424.
return null;
}
VectorExpression ve;
// Long and double are handled using descriptors, string needs to be specially handled.
if (isStringFamily(inputType)) {
ve = createVectorExpression(CastStringToBoolean.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, TypeInfoFactory.booleanTypeInfo, DataTypePhysicalVariation.NONE);
} else {
// Ok, try the UDF.
ve = getVectorExpressionForUdf(null, UDFToBoolean.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, TypeInfoFactory.booleanTypeInfo);
}
if (ve == null || mode == VectorExpressionDescriptor.Mode.PROJECTION) {
return ve;
}
int outputColumnNum = ve.getOutputColumnNum();
SelectColumnIsTrue filterVectorExpr = new SelectColumnIsTrue(outputColumnNum);
filterVectorExpr.setChildExpressions(new VectorExpression[] { ve });
filterVectorExpr.setInputTypeInfos(ve.getOutputTypeInfo());
filterVectorExpr.setInputDataTypePhysicalVariations(DataTypePhysicalVariation.NONE);
return filterVectorExpr;
}
use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.booleanTypeInfo in project hive by apache.
the class ComparisonOpMethodResolver method getEvalMethod.
/*
* (non-Javadoc)
*
* @see
* org.apache.hadoop.hive.ql.exec.UDFMethodResolver#getEvalMethod(java.util
* .List)
*/
@Override
public Method getEvalMethod(List<TypeInfo> argTypeInfos) throws UDFArgumentException {
assert (argTypeInfos.size() == 2);
List<TypeInfo> pTypeInfos = null;
if (argTypeInfos.get(0).equals(TypeInfoFactory.voidTypeInfo) || argTypeInfos.get(1).equals(TypeInfoFactory.voidTypeInfo)) {
pTypeInfos = new ArrayList<TypeInfo>();
pTypeInfos.add(TypeInfoFactory.doubleTypeInfo);
pTypeInfos.add(TypeInfoFactory.doubleTypeInfo);
} else if (argTypeInfos.get(0).equals(TypeInfoFactory.booleanTypeInfo) && argTypeInfos.get(1).equals(TypeInfoFactory.booleanTypeInfo)) {
pTypeInfos = new ArrayList<TypeInfo>();
pTypeInfos.add(TypeInfoFactory.intTypeInfo);
pTypeInfos.add(TypeInfoFactory.intTypeInfo);
} else if (argTypeInfos.get(0) == argTypeInfos.get(1)) {
pTypeInfos = argTypeInfos;
} else {
pTypeInfos = new ArrayList<TypeInfo>();
pTypeInfos.add(TypeInfoFactory.doubleTypeInfo);
pTypeInfos.add(TypeInfoFactory.doubleTypeInfo);
}
Method udfMethod = null;
List<Method> evaluateMethods = new ArrayList<Method>();
for (Method m : Arrays.asList(udfClass.getMethods())) {
if (m.getName().equals("evaluate")) {
evaluateMethods.add(m);
List<TypeInfo> acceptedTypeInfos = TypeInfoUtils.getParameterTypeInfos(m, pTypeInfos.size());
if (acceptedTypeInfos == null) {
// null means the method does not accept number of arguments passed.
continue;
}
boolean match = (acceptedTypeInfos.size() == pTypeInfos.size());
for (int i = 0; i < pTypeInfos.size() && match; i++) {
TypeInfo accepted = acceptedTypeInfos.get(i);
if (accepted != pTypeInfos.get(i)) {
match = false;
}
}
if (match) {
if (udfMethod != null) {
throw new AmbiguousMethodException(udfClass, argTypeInfos, Arrays.asList(new Method[] { udfMethod, m }));
} else {
udfMethod = m;
}
}
}
}
if (udfMethod == null) {
throw new NoMatchingMethodException(udfClass, argTypeInfos, evaluateMethods);
}
return udfMethod;
}
use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.booleanTypeInfo in project hive by apache.
the class TestVectorBetweenIn method doBetweenInVariation.
private boolean doBetweenInVariation(Random random, String typeName, boolean tryDecimal64, BetweenInVariation betweenInVariation, int subVariation) throws Exception {
TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeName);
boolean isDecimal64 = checkDecimal64(tryDecimal64, typeInfo);
DataTypePhysicalVariation dataTypePhysicalVariation = (isDecimal64 ? DataTypePhysicalVariation.DECIMAL_64 : DataTypePhysicalVariation.NONE);
final int decimal64Scale = (isDecimal64 ? ((DecimalTypeInfo) typeInfo).getScale() : 0);
// ----------------------------------------------------------------------------------------------
ObjectInspector objectInspector = TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(typeInfo);
final int valueCount = 10 + random.nextInt(10);
List<Object> valueList = new ArrayList<Object>(valueCount);
for (int i = 0; i < valueCount; i++) {
valueList.add(VectorRandomRowSource.randomWritable(random, typeInfo, objectInspector, dataTypePhysicalVariation, /* allowNull */
false));
}
final boolean isBetween = (betweenInVariation == BetweenInVariation.FILTER_BETWEEN || betweenInVariation == BetweenInVariation.FILTER_NOT_BETWEEN || betweenInVariation == BetweenInVariation.PROJECTION_BETWEEN || betweenInVariation == BetweenInVariation.PROJECTION_NOT_BETWEEN);
List<Object> compareList = new ArrayList<Object>();
List<Object> sortedList = new ArrayList<Object>(valueCount);
sortedList.addAll(valueList);
Object exampleObject = valueList.get(0);
WritableComparator writableComparator = WritableComparator.get((Class<? extends WritableComparable>) exampleObject.getClass());
sortedList.sort(writableComparator);
final boolean isInvert;
if (isBetween) {
// FILTER_BETWEEN
// FILTER_NOT_BETWEEN
// PROJECTION_BETWEEN
// PROJECTION_NOT_BETWEEN
isInvert = (betweenInVariation == BetweenInVariation.FILTER_NOT_BETWEEN || betweenInVariation == BetweenInVariation.PROJECTION_NOT_BETWEEN);
switch(subVariation) {
case 0:
// Range covers all values exactly.
compareList.add(sortedList.get(0));
compareList.add(sortedList.get(valueCount - 1));
break;
case 1:
// Exclude the first and last sorted.
compareList.add(sortedList.get(1));
compareList.add(sortedList.get(valueCount - 2));
break;
case 2:
// Only last 2 sorted.
compareList.add(sortedList.get(valueCount - 2));
compareList.add(sortedList.get(valueCount - 1));
break;
case 3:
case 4:
case 5:
case 6:
{
// Choose 2 adjacent in the middle.
Object min = sortedList.get(5);
Object max = sortedList.get(6);
compareList.add(min);
compareList.add(max);
if (subVariation == 4) {
removeValue(valueList, min);
} else if (subVariation == 5) {
removeValue(valueList, max);
} else if (subVariation == 6) {
removeValue(valueList, min);
removeValue(valueList, max);
}
}
break;
default:
return false;
}
} else {
// FILTER_IN.
// PROJECTION_IN.
isInvert = false;
switch(subVariation) {
case 0:
// All values.
compareList.addAll(valueList);
break;
case 1:
// Don't include the first and last sorted.
for (int i = 1; i < valueCount - 1; i++) {
compareList.add(valueList.get(i));
}
break;
case 2:
// The even ones.
for (int i = 2; i < valueCount; i += 2) {
compareList.add(valueList.get(i));
}
break;
case 3:
{
// Choose 2 adjacent in the middle.
Object min = sortedList.get(5);
Object max = sortedList.get(6);
compareList.add(min);
compareList.add(max);
if (subVariation == 4) {
removeValue(valueList, min);
} else if (subVariation == 5) {
removeValue(valueList, max);
} else if (subVariation == 6) {
removeValue(valueList, min);
removeValue(valueList, max);
}
}
break;
default:
return false;
}
}
// ----------------------------------------------------------------------------------------------
GenerationSpec generationSpec = GenerationSpec.createValueList(typeInfo, valueList);
List<GenerationSpec> generationSpecList = new ArrayList<GenerationSpec>();
List<DataTypePhysicalVariation> explicitDataTypePhysicalVariationList = new ArrayList<DataTypePhysicalVariation>();
generationSpecList.add(generationSpec);
explicitDataTypePhysicalVariationList.add(dataTypePhysicalVariation);
VectorRandomRowSource rowSource = new VectorRandomRowSource();
rowSource.initGenerationSpecSchema(random, generationSpecList, /* maxComplexDepth */
0, /* allowNull */
true, /* isUnicodeOk */
true, explicitDataTypePhysicalVariationList);
List<String> columns = new ArrayList<String>();
String col1Name = rowSource.columnNames().get(0);
columns.add(col1Name);
final ExprNodeDesc col1Expr = new ExprNodeColumnDesc(typeInfo, col1Name, "table", false);
List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>();
if (isBetween) {
children.add(new ExprNodeConstantDesc(Boolean.valueOf(isInvert)));
}
children.add(col1Expr);
for (Object compareObject : compareList) {
ExprNodeConstantDesc constDesc = new ExprNodeConstantDesc(typeInfo, VectorRandomRowSource.getNonWritableObject(compareObject, typeInfo, objectInspector));
children.add(constDesc);
}
String[] columnNames = columns.toArray(new String[0]);
Object[][] randomRows = rowSource.randomRows(100000);
VectorRandomBatchSource batchSource = VectorRandomBatchSource.createInterestingBatches(random, rowSource, randomRows, null);
final GenericUDF udf;
final ObjectInspector outputObjectInspector;
if (isBetween) {
udf = new GenericUDFBetween();
// First argument is boolean invert. Arguments 1..3 are inspectors for range limits...
ObjectInspector[] argumentOIs = new ObjectInspector[4];
argumentOIs[0] = PrimitiveObjectInspectorFactory.writableBooleanObjectInspector;
argumentOIs[1] = objectInspector;
argumentOIs[2] = objectInspector;
argumentOIs[3] = objectInspector;
outputObjectInspector = udf.initialize(argumentOIs);
} else {
final int compareCount = compareList.size();
udf = new GenericUDFIn();
ObjectInspector[] argumentOIs = new ObjectInspector[compareCount];
ConstantObjectInspector constantObjectInspector = (ConstantObjectInspector) children.get(1).getWritableObjectInspector();
for (int i = 0; i < compareCount; i++) {
argumentOIs[i] = constantObjectInspector;
}
outputObjectInspector = udf.initialize(argumentOIs);
}
TypeInfo outputTypeInfo = TypeInfoUtils.getTypeInfoFromObjectInspector(outputObjectInspector);
ExprNodeGenericFuncDesc exprDesc = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, udf, children);
return executeTestModesAndVerify(typeInfo, betweenInVariation, compareList, columns, columnNames, children, udf, exprDesc, randomRows, rowSource, batchSource, outputTypeInfo, /* skipAdaptor */
false);
}
use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.booleanTypeInfo in project hive by apache.
the class TestVectorBetweenIn method doBetweenStructInVariation.
private boolean doBetweenStructInVariation(Random random, String structTypeName, BetweenInVariation betweenInVariation) throws Exception {
StructTypeInfo structTypeInfo = (StructTypeInfo) TypeInfoUtils.getTypeInfoFromTypeString(structTypeName);
ObjectInspector structObjectInspector = TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(structTypeInfo);
final int valueCount = 10 + random.nextInt(10);
List<Object> valueList = new ArrayList<Object>(valueCount);
for (int i = 0; i < valueCount; i++) {
valueList.add(VectorRandomRowSource.randomWritable(random, structTypeInfo, structObjectInspector, DataTypePhysicalVariation.NONE, /* allowNull */
false));
}
final boolean isInvert = false;
// No convenient WritableComparator / WritableComparable available for STRUCT.
List<Object> compareList = new ArrayList<Object>();
Set<Integer> includedSet = new HashSet<Integer>();
final int chooseLimit = 4 + random.nextInt(valueCount / 2);
int chooseCount = 0;
while (chooseCount < chooseLimit) {
final int index = random.nextInt(valueCount);
if (includedSet.contains(index)) {
continue;
}
includedSet.add(index);
compareList.add(valueList.get(index));
chooseCount++;
}
// ----------------------------------------------------------------------------------------------
GenerationSpec structGenerationSpec = GenerationSpec.createValueList(structTypeInfo, valueList);
List<GenerationSpec> structGenerationSpecList = new ArrayList<GenerationSpec>();
List<DataTypePhysicalVariation> structExplicitDataTypePhysicalVariationList = new ArrayList<DataTypePhysicalVariation>();
structGenerationSpecList.add(structGenerationSpec);
structExplicitDataTypePhysicalVariationList.add(DataTypePhysicalVariation.NONE);
VectorRandomRowSource structRowSource = new VectorRandomRowSource();
structRowSource.initGenerationSpecSchema(random, structGenerationSpecList, /* maxComplexDepth */
0, /* allowNull */
true, /* isUnicodeOk */
true, structExplicitDataTypePhysicalVariationList);
Object[][] structRandomRows = structRowSource.randomRows(100000);
// ---------------------------------------------------------------------------------------------
List<GenerationSpec> generationSpecList = new ArrayList<GenerationSpec>();
List<DataTypePhysicalVariation> explicitDataTypePhysicalVariationList = new ArrayList<DataTypePhysicalVariation>();
List<TypeInfo> fieldTypeInfoList = structTypeInfo.getAllStructFieldTypeInfos();
final int fieldCount = fieldTypeInfoList.size();
for (int i = 0; i < fieldCount; i++) {
GenerationSpec generationSpec = GenerationSpec.createOmitGeneration(fieldTypeInfoList.get(i));
generationSpecList.add(generationSpec);
explicitDataTypePhysicalVariationList.add(DataTypePhysicalVariation.NONE);
}
VectorRandomRowSource rowSource = new VectorRandomRowSource();
rowSource.initGenerationSpecSchema(random, generationSpecList, /* maxComplexDepth */
0, /* allowNull */
true, /* isUnicodeOk */
true, explicitDataTypePhysicalVariationList);
Object[][] randomRows = rowSource.randomRows(100000);
final int rowCount = randomRows.length;
for (int r = 0; r < rowCount; r++) {
List<Object> fieldValueList = (ArrayList) structRandomRows[r][0];
for (int f = 0; f < fieldCount; f++) {
randomRows[r][f] = fieldValueList.get(f);
}
}
// ---------------------------------------------------------------------------------------------
// Currently, STRUCT IN vectorization assumes a GenericUDFStruct.
List<ObjectInspector> structUdfObjectInspectorList = new ArrayList<ObjectInspector>();
List<ExprNodeDesc> structUdfChildren = new ArrayList<ExprNodeDesc>(fieldCount);
List<String> rowColumnNameList = rowSource.columnNames();
for (int i = 0; i < fieldCount; i++) {
TypeInfo fieldTypeInfo = fieldTypeInfoList.get(i);
ExprNodeColumnDesc fieldExpr = new ExprNodeColumnDesc(fieldTypeInfo, rowColumnNameList.get(i), "table", false);
structUdfChildren.add(fieldExpr);
ObjectInspector fieldObjectInspector = VectorRandomRowSource.getObjectInspector(fieldTypeInfo, DataTypePhysicalVariation.NONE);
structUdfObjectInspectorList.add(fieldObjectInspector);
}
StandardStructObjectInspector structUdfObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(rowColumnNameList, structUdfObjectInspectorList);
String structUdfTypeName = structUdfObjectInspector.getTypeName();
TypeInfo structUdfTypeInfo = TypeInfoUtils.getTypeInfoFromTypeString(structUdfTypeName);
String structFuncText = "struct";
FunctionInfo fi = FunctionRegistry.getFunctionInfo(structFuncText);
GenericUDF genericUDF = fi.getGenericUDF();
ExprNodeDesc col1Expr = new ExprNodeGenericFuncDesc(structUdfObjectInspector, genericUDF, structFuncText, structUdfChildren);
// ---------------------------------------------------------------------------------------------
List<String> columns = new ArrayList<String>();
List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>();
children.add(col1Expr);
for (int i = 0; i < compareList.size(); i++) {
Object compareObject = compareList.get(i);
ExprNodeConstantDesc constDesc = new ExprNodeConstantDesc(structUdfTypeInfo, VectorRandomRowSource.getNonWritableObject(compareObject, structUdfTypeInfo, structUdfObjectInspector));
children.add(constDesc);
}
for (int i = 0; i < fieldCount; i++) {
columns.add(rowColumnNameList.get(i));
}
String[] columnNames = columns.toArray(new String[0]);
VectorRandomBatchSource batchSource = VectorRandomBatchSource.createInterestingBatches(random, rowSource, randomRows, null);
// ---------------------------------------------------------------------------------------------
final GenericUDF udf = new GenericUDFIn();
final int compareCount = compareList.size();
ObjectInspector[] argumentOIs = new ObjectInspector[compareCount];
for (int i = 0; i < compareCount; i++) {
argumentOIs[i] = structUdfObjectInspector;
}
final ObjectInspector outputObjectInspector = udf.initialize(argumentOIs);
TypeInfo outputTypeInfo = TypeInfoUtils.getTypeInfoFromObjectInspector(outputObjectInspector);
ExprNodeGenericFuncDesc exprDesc = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, udf, children);
return executeTestModesAndVerify(structUdfTypeInfo, betweenInVariation, compareList, columns, columnNames, children, udf, exprDesc, randomRows, rowSource, batchSource, outputTypeInfo, /* skipAdaptor */
true);
}
use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.booleanTypeInfo in project hive by apache.
the class SemiJoinReductionMerge method createSemiJoinPredicate.
/**
* Creates the multi-column semi-join predicate that is applied on the target relation.
*
* Assuming that the target columns of the semi-join are fname, lname, and age, the generated predicates is:
* <pre>
* fname BETWEEN ?min_fname AND ?max_fname and
* lname BETWEEN ?min_lname AND ?max_lname and
* age BETWEEN ?min_age AND ?max_age and
* IN_BLOOM_FILTER(HASH(fname,lname,age),?bloom_filter)
* </pre>
* where the question mark (?) indicates dynamic values bound at runtime.
*/
private static ExprNodeGenericFuncDesc createSemiJoinPredicate(List<ReduceSinkOperator> sjBranches, RuntimeValuesInfo sjValueInfo, ParseContext context) {
// Performance note: To speed-up evaluation 'BETWEEN' predicates should come before the 'IN_BLOOM_FILTER'
Deque<String> dynamicIds = new ArrayDeque<>(sjValueInfo.getDynamicValueIDs());
List<ExprNodeDesc> sjPredicates = new ArrayList<>();
List<ExprNodeDesc> hashArgs = new ArrayList<>();
for (ReduceSinkOperator rs : sjBranches) {
RuntimeValuesInfo info = context.getRsToRuntimeValuesInfoMap().get(rs);
checkState(info.getTargetColumns().size() == 1, "Cannot handle multi-column semijoin branches.");
final ExprNodeDesc targetColumn = info.getTargetColumns().get(0);
TypeInfo typeInfo = targetColumn.getTypeInfo();
DynamicValue minDynamic = new DynamicValue(dynamicIds.poll(), typeInfo);
DynamicValue maxDynamic = new DynamicValue(dynamicIds.poll(), typeInfo);
List<ExprNodeDesc> betweenArgs = Arrays.asList(// Use false to not invert between result
new ExprNodeConstantDesc(Boolean.FALSE), targetColumn, new ExprNodeDynamicValueDesc(minDynamic), new ExprNodeDynamicValueDesc(maxDynamic));
ExprNodeDesc betweenExp = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, new GenericUDFBetween(), "between", betweenArgs);
sjPredicates.add(betweenExp);
hashArgs.add(targetColumn);
}
ExprNodeDesc hashExp = ExprNodeDescUtils.murmurHash(hashArgs);
assert dynamicIds.size() == 1 : "There should be one column left untreated the one with the bloom filter";
DynamicValue bloomDynamic = new DynamicValue(dynamicIds.poll(), TypeInfoFactory.binaryTypeInfo);
sjPredicates.add(new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, new GenericUDFInBloomFilter(), "in_bloom_filter", Arrays.asList(hashExp, new ExprNodeDynamicValueDesc(bloomDynamic))));
return and(sjPredicates);
}
Aggregations