use of org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan in project hive by apache.
the class TestAccumuloPredicateHandler method testCreateIteratorSettings.
@Test
public void testCreateIteratorSettings() throws Exception {
// Override what's placed in the Configuration by setup()
conf = new JobConf();
List<String> columnNames = Arrays.asList("field1", "field2", "rid");
List<TypeInfo> columnTypes = Arrays.<TypeInfo>asList(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.intTypeInfo, TypeInfoFactory.stringTypeInfo);
conf.set(serdeConstants.LIST_COLUMNS, Joiner.on(',').join(columnNames));
conf.set(serdeConstants.LIST_COLUMN_TYPES, "string,int,string");
conf.set(AccumuloSerDeParameters.DEFAULT_STORAGE_TYPE, ColumnEncoding.BINARY.getName());
String columnMappingStr = "cf:f1,cf:f2,:rowID";
conf.set(AccumuloSerDeParameters.COLUMN_MAPPINGS, columnMappingStr);
columnMapper = new ColumnMapper(columnMappingStr, ColumnEncoding.STRING.getName(), columnNames, columnTypes);
ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "field1", null, false);
ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "aaa");
List<ExprNodeDesc> children = Lists.newArrayList();
children.add(column);
children.add(constant);
ExprNodeDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPEqualOrLessThan(), children);
assertNotNull(node);
ExprNodeDesc column2 = new ExprNodeColumnDesc(TypeInfoFactory.intTypeInfo, "field2", null, false);
ExprNodeDesc constant2 = new ExprNodeConstantDesc(TypeInfoFactory.intTypeInfo, 5);
List<ExprNodeDesc> children2 = Lists.newArrayList();
children2.add(column2);
children2.add(constant2);
ExprNodeDesc node2 = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPGreaterThan(), children2);
assertNotNull(node2);
List<ExprNodeDesc> bothFilters = Lists.newArrayList();
bothFilters.add(node);
bothFilters.add(node2);
ExprNodeGenericFuncDesc both = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPAnd(), bothFilters);
String filterExpr = SerializationUtilities.serializeExpression(both);
conf.set(TableScanDesc.FILTER_EXPR_CONF_STR, filterExpr);
List<IteratorSetting> iterators = handler.getIterators(conf, columnMapper);
assertEquals(iterators.size(), 2);
IteratorSetting is1 = iterators.get(0);
IteratorSetting is2 = iterators.get(1);
boolean foundQual = false;
boolean foundPCompare = false;
boolean foundCOpt = false;
boolean foundConst = false;
for (Map.Entry<String, String> option : is1.getOptions().entrySet()) {
String optKey = option.getKey();
if (optKey.equals(PrimitiveComparisonFilter.COLUMN)) {
foundQual = true;
assertEquals(option.getValue(), "cf:f1");
} else if (optKey.equals(PrimitiveComparisonFilter.CONST_VAL)) {
foundConst = true;
assertEquals(option.getValue(), Base64.getEncoder().encodeToString("aaa".getBytes()));
} else if (optKey.equals(PrimitiveComparisonFilter.COMPARE_OPT_CLASS)) {
foundCOpt = true;
assertEquals(option.getValue(), LessThanOrEqual.class.getName());
} else if (optKey.equals(PrimitiveComparisonFilter.P_COMPARE_CLASS)) {
foundPCompare = true;
assertEquals(option.getValue(), StringCompare.class.getName());
}
}
assertTrue(foundConst & foundCOpt & foundPCompare & foundQual);
foundQual = false;
foundPCompare = false;
foundCOpt = false;
foundConst = false;
for (Map.Entry<String, String> option : is2.getOptions().entrySet()) {
String optKey = option.getKey();
if (optKey.equals(PrimitiveComparisonFilter.COLUMN)) {
foundQual = true;
assertEquals(option.getValue(), "cf:f2");
} else if (optKey.equals(PrimitiveComparisonFilter.CONST_VAL)) {
foundConst = true;
byte[] intVal = new byte[4];
ByteBuffer.wrap(intVal).putInt(5);
assertEquals(option.getValue(), Base64.getEncoder().encodeToString(intVal));
} else if (optKey.equals(PrimitiveComparisonFilter.COMPARE_OPT_CLASS)) {
foundCOpt = true;
assertEquals(option.getValue(), GreaterThan.class.getName());
} else if (optKey.equals(PrimitiveComparisonFilter.P_COMPARE_CLASS)) {
foundPCompare = true;
assertEquals(option.getValue(), IntCompare.class.getName());
}
}
assertTrue(foundConst & foundCOpt & foundPCompare & foundQual);
}
use of org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan in project hive by apache.
the class TestAccumuloPredicateHandler method testDisjointRanges.
@Test
public void testDisjointRanges() throws SerDeException {
ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false);
ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "aaa");
List<ExprNodeDesc> children = Lists.newArrayList();
children.add(column);
children.add(constant);
ExprNodeDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPEqualOrLessThan(), children);
assertNotNull(node);
ExprNodeDesc column2 = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false);
ExprNodeDesc constant2 = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "bbb");
List<ExprNodeDesc> children2 = Lists.newArrayList();
children2.add(column2);
children2.add(constant2);
ExprNodeDesc node2 = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPGreaterThan(), children2);
assertNotNull(node2);
List<ExprNodeDesc> bothFilters = Lists.newArrayList();
bothFilters.add(node);
bothFilters.add(node2);
ExprNodeGenericFuncDesc both = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPAnd(), bothFilters);
String filterExpr = SerializationUtilities.serializeExpression(both);
conf.set(TableScanDesc.FILTER_EXPR_CONF_STR, filterExpr);
Collection<Range> ranges = handler.getRanges(conf, columnMapper);
// Impossible to get ranges for row <= 'aaa' and row >= 'bbb'
assertEquals(0, ranges.size());
}
use of org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan in project hive by apache.
the class TestKuduPredicateHandler method testOrPredicates.
@Test
public void testOrPredicates() throws Exception {
for (ColumnSchema col : SCHEMA.getColumns()) {
// Skip binary columns because binary predicates are not supported. (HIVE-11370)
if (col.getName().equals("null") || col.getName().equals("default") || col.getName().equals("binary")) {
continue;
}
PrimitiveTypeInfo typeInfo = toHiveType(col.getType(), col.getTypeAttributes());
ExprNodeDesc colExpr = new ExprNodeColumnDesc(typeInfo, col.getName(), null, false);
ExprNodeDesc constExpr = new ExprNodeConstantDesc(typeInfo, ROW.getObject(col.getName()));
List<ExprNodeDesc> children = Lists.newArrayList();
children.add(colExpr);
children.add(constExpr);
ExprNodeGenericFuncDesc gePredicateExpr = new ExprNodeGenericFuncDesc(typeInfo, new GenericUDFOPEqualOrGreaterThan(), children);
ExprNodeGenericFuncDesc lePredicateExpr = new ExprNodeGenericFuncDesc(typeInfo, new GenericUDFOPEqualOrLessThan(), children);
List<ExprNodeDesc> orChildren = Lists.newArrayList();
orChildren.add(gePredicateExpr);
orChildren.add(lePredicateExpr);
ExprNodeGenericFuncDesc predicateExpr = new ExprNodeGenericFuncDesc(typeInfo, new GenericUDFOPOr(), orChildren);
// Verify KuduPredicateHandler.decompose
HiveStoragePredicateHandler.DecomposedPredicate decompose = KuduPredicateHandler.decompose(predicateExpr, SCHEMA);
// OR predicates are currently not supported.
assertNull(decompose);
List<KuduPredicate> predicates = expressionToPredicates(predicateExpr);
assertEquals(0, predicates.size());
// Also test NOT OR.
List<ExprNodeDesc> notChildren = Lists.newArrayList();
notChildren.add(predicateExpr);
ExprNodeGenericFuncDesc notPredicateExpr = new ExprNodeGenericFuncDesc(typeInfo, new GenericUDFOPNot(), notChildren);
// Verify KuduPredicateHandler.decompose
HiveStoragePredicateHandler.DecomposedPredicate decomposeNot = KuduPredicateHandler.decompose(notPredicateExpr, SCHEMA);
// See note in KuduPredicateHandler.newAnalyzer.
assertNull(decomposeNot);
List<KuduPredicate> notPredicates = expressionToPredicates(notPredicateExpr);
assertEquals(2, notPredicates.size());
}
}
use of org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan in project hive by apache.
the class TestVectorFilterCompare method doTestsWithDiffColumnScalar.
private void doTestsWithDiffColumnScalar(Random random, TypeInfo typeInfo1, TypeInfo typeInfo2, ColumnScalarMode columnScalarMode, Comparison comparison, boolean tryDecimal64) throws Exception {
String typeName1 = typeInfo1.getTypeName();
PrimitiveCategory primitiveCategory1 = ((PrimitiveTypeInfo) typeInfo1).getPrimitiveCategory();
String typeName2 = typeInfo2.getTypeName();
PrimitiveCategory primitiveCategory2 = ((PrimitiveTypeInfo) typeInfo2).getPrimitiveCategory();
List<GenerationSpec> generationSpecList = new ArrayList<GenerationSpec>();
List<DataTypePhysicalVariation> explicitDataTypePhysicalVariationList = new ArrayList<DataTypePhysicalVariation>();
List<String> columns = new ArrayList<String>();
int columnNum = 1;
ExprNodeDesc col1Expr;
Object scalar1Object = null;
final boolean decimal64Enable1 = checkDecimal64(tryDecimal64, typeInfo1);
if (columnScalarMode == ColumnScalarMode.COLUMN_COLUMN || columnScalarMode == ColumnScalarMode.COLUMN_SCALAR) {
generationSpecList.add(GenerationSpec.createSameType(typeInfo1));
explicitDataTypePhysicalVariationList.add(decimal64Enable1 ? DataTypePhysicalVariation.DECIMAL_64 : DataTypePhysicalVariation.NONE);
String columnName = "col" + (columnNum++);
col1Expr = new ExprNodeColumnDesc(typeInfo1, columnName, "table", false);
columns.add(columnName);
} else {
scalar1Object = VectorRandomRowSource.randomPrimitiveObject(random, (PrimitiveTypeInfo) typeInfo1);
// Adjust the decimal type to the scalar's type...
if (typeInfo1 instanceof DecimalTypeInfo) {
typeInfo1 = getDecimalScalarTypeInfo(scalar1Object);
}
col1Expr = new ExprNodeConstantDesc(typeInfo1, scalar1Object);
}
ExprNodeDesc col2Expr;
Object scalar2Object = null;
final boolean decimal64Enable2 = checkDecimal64(tryDecimal64, typeInfo2);
if (columnScalarMode == ColumnScalarMode.COLUMN_COLUMN || columnScalarMode == ColumnScalarMode.SCALAR_COLUMN) {
generationSpecList.add(GenerationSpec.createSameType(typeInfo2));
explicitDataTypePhysicalVariationList.add(decimal64Enable2 ? DataTypePhysicalVariation.DECIMAL_64 : DataTypePhysicalVariation.NONE);
String columnName = "col" + (columnNum++);
col2Expr = new ExprNodeColumnDesc(typeInfo2, columnName, "table", false);
columns.add(columnName);
} else {
scalar2Object = VectorRandomRowSource.randomPrimitiveObject(random, (PrimitiveTypeInfo) typeInfo2);
// Adjust the decimal type to the scalar's type...
if (typeInfo2 instanceof DecimalTypeInfo) {
typeInfo2 = getDecimalScalarTypeInfo(scalar2Object);
}
col2Expr = new ExprNodeConstantDesc(typeInfo2, scalar2Object);
}
List<ObjectInspector> objectInspectorList = new ArrayList<ObjectInspector>();
objectInspectorList.add(VectorRandomRowSource.getObjectInspector(typeInfo1));
objectInspectorList.add(VectorRandomRowSource.getObjectInspector(typeInfo2));
List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>();
children.add(col1Expr);
children.add(col2Expr);
// ----------------------------------------------------------------------------------------------
String[] columnNames = columns.toArray(new String[0]);
VectorRandomRowSource rowSource = new VectorRandomRowSource();
rowSource.initGenerationSpecSchema(random, generationSpecList, /* maxComplexDepth */
0, /* allowNull */
true, /* isUnicodeOk */
true, explicitDataTypePhysicalVariationList);
Object[][] randomRows = rowSource.randomRows(100000);
VectorRandomBatchSource batchSource = VectorRandomBatchSource.createInterestingBatches(random, rowSource, randomRows, null);
GenericUDF genericUdf;
switch(comparison) {
case EQUALS:
genericUdf = new GenericUDFOPEqual();
break;
case LESS_THAN:
genericUdf = new GenericUDFOPLessThan();
break;
case LESS_THAN_EQUAL:
genericUdf = new GenericUDFOPEqualOrLessThan();
break;
case GREATER_THAN:
genericUdf = new GenericUDFOPGreaterThan();
break;
case GREATER_THAN_EQUAL:
genericUdf = new GenericUDFOPEqualOrGreaterThan();
break;
case NOT_EQUALS:
genericUdf = new GenericUDFOPNotEqual();
break;
default:
throw new RuntimeException("Unexpected arithmetic " + comparison);
}
ObjectInspector[] objectInspectors = objectInspectorList.toArray(new ObjectInspector[objectInspectorList.size()]);
ObjectInspector outputObjectInspector = null;
try {
outputObjectInspector = genericUdf.initialize(objectInspectors);
} catch (Exception e) {
Assert.fail(e.toString());
}
TypeInfo outputTypeInfo = TypeInfoUtils.getTypeInfoFromObjectInspector(outputObjectInspector);
ExprNodeGenericFuncDesc exprDesc = new ExprNodeGenericFuncDesc(outputTypeInfo, genericUdf, children);
final int rowCount = randomRows.length;
Object[][] resultObjectsArray = new Object[FilterCompareTestMode.count][];
for (int i = 0; i < FilterCompareTestMode.count; i++) {
Object[] resultObjects = new Object[rowCount];
resultObjectsArray[i] = resultObjects;
FilterCompareTestMode filterCompareTestMode = FilterCompareTestMode.values()[i];
switch(filterCompareTestMode) {
case ROW_MODE:
doRowFilterCompareTest(typeInfo1, typeInfo2, columns, children, exprDesc, comparison, randomRows, columnScalarMode, rowSource.rowStructObjectInspector(), outputTypeInfo, resultObjects);
break;
case ADAPTOR:
case FILTER_VECTOR_EXPRESSION:
case COMPARE_VECTOR_EXPRESSION:
doVectorFilterCompareTest(typeInfo1, typeInfo2, columns, columnNames, rowSource.typeInfos(), rowSource.dataTypePhysicalVariations(), children, exprDesc, comparison, filterCompareTestMode, columnScalarMode, batchSource, exprDesc.getWritableObjectInspector(), outputTypeInfo, resultObjects);
break;
default:
throw new RuntimeException("Unexpected IF statement test mode " + filterCompareTestMode);
}
}
for (int i = 0; i < rowCount; i++) {
// Row-mode is the expected value.
Object expectedResult = resultObjectsArray[0][i];
for (int v = 1; v < FilterCompareTestMode.count; v++) {
FilterCompareTestMode filterCompareTestMode = FilterCompareTestMode.values()[v];
Object vectorResult = resultObjectsArray[v][i];
if (filterCompareTestMode == FilterCompareTestMode.FILTER_VECTOR_EXPRESSION && expectedResult == null && vectorResult != null) {
// This is OK.
boolean vectorBoolean = ((BooleanWritable) vectorResult).get();
if (vectorBoolean) {
Assert.fail("Row " + i + " typeName1 " + typeName1 + " typeName2 " + typeName2 + " outputTypeName " + outputTypeInfo.getTypeName() + " " + comparison + " " + filterCompareTestMode + " " + columnScalarMode + " result is NOT NULL and true" + " does not match row-mode expected result is NULL which means false here" + (columnScalarMode == ColumnScalarMode.SCALAR_COLUMN ? " scalar1 " + scalar1Object.toString() : "") + " row values " + Arrays.toString(randomRows[i]) + (columnScalarMode == ColumnScalarMode.COLUMN_SCALAR ? " scalar2 " + scalar2Object.toString() : ""));
}
} else if (expectedResult == null || vectorResult == null) {
if (expectedResult != null || vectorResult != null) {
Assert.fail("Row " + i + " typeName1 " + typeName1 + " typeName2 " + typeName2 + " outputTypeName " + outputTypeInfo.getTypeName() + " " + comparison + " " + filterCompareTestMode + " " + columnScalarMode + " result is NULL " + (vectorResult == null) + " does not match row-mode expected result is NULL " + (expectedResult == null) + (columnScalarMode == ColumnScalarMode.SCALAR_COLUMN ? " scalar1 " + scalar1Object.toString() : "") + " row values " + Arrays.toString(randomRows[i]) + (columnScalarMode == ColumnScalarMode.COLUMN_SCALAR ? " scalar2 " + scalar2Object.toString() : ""));
}
} else {
if (!expectedResult.equals(vectorResult)) {
Assert.fail("Row " + i + " typeName1 " + typeName1 + " typeName2 " + typeName2 + " outputTypeName " + outputTypeInfo.getTypeName() + " " + comparison + " " + filterCompareTestMode + " " + columnScalarMode + " result " + vectorResult.toString() + " (" + vectorResult.getClass().getSimpleName() + ")" + " does not match row-mode expected result " + expectedResult.toString() + " (" + expectedResult.getClass().getSimpleName() + ")" + (columnScalarMode == ColumnScalarMode.SCALAR_COLUMN ? " scalar1 " + scalar1Object.toString() : "") + " row values " + Arrays.toString(randomRows[i]) + (columnScalarMode == ColumnScalarMode.COLUMN_SCALAR ? " scalar2 " + scalar2Object.toString() : ""));
}
}
}
}
}
use of org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan in project hive by apache.
the class TestOrcSplitElimination method testSplitEliminationLargeMaxSplit.
@Test
public void testSplitEliminationLargeMaxSplit() throws Exception {
ObjectInspector inspector = createIO();
Writer writer = OrcFile.createWriter(fs, testFilePath, conf, inspector, 100000, CompressionKind.NONE, 10000, 10000);
writeData(writer);
writer.close();
HiveConf.setLongVar(conf, HiveConf.ConfVars.MAPREDMINSPLITSIZE, 1000);
HiveConf.setLongVar(conf, HiveConf.ConfVars.MAPREDMAXSPLITSIZE, 150000);
InputFormat<?, ?> in = new OrcInputFormat();
FileInputFormat.setInputPaths(conf, testFilePath.toString());
GenericUDF udf = new GenericUDFOPEqualOrLessThan();
List<ExprNodeDesc> childExpr = Lists.newArrayList();
ExprNodeConstantDesc con;
ExprNodeGenericFuncDesc en;
String sargStr;
createTestSarg(inspector, udf, childExpr);
InputSplit[] splits = in.getSplits(conf, 1);
assertEquals(2, splits.length);
con = new ExprNodeConstantDesc(0);
childExpr.set(1, con);
en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
sargStr = SerializationUtilities.serializeExpression(en);
conf.set("hive.io.filter.expr.serialized", sargStr);
splits = in.getSplits(conf, 1);
// no stripes satisfies the condition
assertEquals(0, splits.length);
con = new ExprNodeConstantDesc(2);
childExpr.set(1, con);
en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
sargStr = SerializationUtilities.serializeExpression(en);
conf.set("hive.io.filter.expr.serialized", sargStr);
splits = in.getSplits(conf, 1);
// only first stripe will satisfy condition and hence single split
assertEquals(1, splits.length);
con = new ExprNodeConstantDesc(5);
childExpr.set(1, con);
en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
sargStr = SerializationUtilities.serializeExpression(en);
conf.set("hive.io.filter.expr.serialized", sargStr);
splits = in.getSplits(conf, 1);
// first stripe will satisfy the predicate and will be a single split, last stripe will be a
// separate split
assertEquals(2, splits.length);
con = new ExprNodeConstantDesc(13);
childExpr.set(1, con);
en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
sargStr = SerializationUtilities.serializeExpression(en);
conf.set("hive.io.filter.expr.serialized", sargStr);
splits = in.getSplits(conf, 1);
// first 2 stripes will satisfy the predicate and merged to single split, last stripe will be a
// separate split
assertEquals(2, splits.length);
con = new ExprNodeConstantDesc(29);
childExpr.set(1, con);
en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
sargStr = SerializationUtilities.serializeExpression(en);
conf.set("hive.io.filter.expr.serialized", sargStr);
splits = in.getSplits(conf, 1);
// first 3 stripes will satisfy the predicate and merged to single split, last stripe will be a
// separate split
assertEquals(2, splits.length);
con = new ExprNodeConstantDesc(70);
childExpr.set(1, con);
en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
sargStr = SerializationUtilities.serializeExpression(en);
conf.set("hive.io.filter.expr.serialized", sargStr);
splits = in.getSplits(conf, 1);
// first 2 stripes will satisfy the predicate and merged to single split, last two stripe will
// be a separate split
assertEquals(2, splits.length);
}
Aggregations