use of org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan in project hive by apache.
the class TestAccumuloPredicateHandler method testCreateIteratorSettings.
@Test
public void testCreateIteratorSettings() throws Exception {
// Override what's placed in the Configuration by setup()
conf = new JobConf();
List<String> columnNames = Arrays.asList("field1", "field2", "rid");
List<TypeInfo> columnTypes = Arrays.<TypeInfo>asList(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.intTypeInfo, TypeInfoFactory.stringTypeInfo);
conf.set(serdeConstants.LIST_COLUMNS, Joiner.on(',').join(columnNames));
conf.set(serdeConstants.LIST_COLUMN_TYPES, "string,int,string");
String columnMappingStr = "cf:f1,cf:f2,:rowID";
conf.set(AccumuloSerDeParameters.COLUMN_MAPPINGS, columnMappingStr);
columnMapper = new ColumnMapper(columnMappingStr, ColumnEncoding.STRING.getName(), columnNames, columnTypes);
ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "field1", null, false);
ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "aaa");
List<ExprNodeDesc> children = Lists.newArrayList();
children.add(column);
children.add(constant);
ExprNodeDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPEqualOrLessThan(), children);
assertNotNull(node);
ExprNodeDesc column2 = new ExprNodeColumnDesc(TypeInfoFactory.intTypeInfo, "field2", null, false);
ExprNodeDesc constant2 = new ExprNodeConstantDesc(TypeInfoFactory.intTypeInfo, 5);
List<ExprNodeDesc> children2 = Lists.newArrayList();
children2.add(column2);
children2.add(constant2);
ExprNodeDesc node2 = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPGreaterThan(), children2);
assertNotNull(node2);
List<ExprNodeDesc> bothFilters = Lists.newArrayList();
bothFilters.add(node);
bothFilters.add(node2);
ExprNodeGenericFuncDesc both = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPAnd(), bothFilters);
String filterExpr = SerializationUtilities.serializeExpression(both);
conf.set(TableScanDesc.FILTER_EXPR_CONF_STR, filterExpr);
List<IteratorSetting> iterators = handler.getIterators(conf, columnMapper);
assertEquals(iterators.size(), 2);
IteratorSetting is1 = iterators.get(0);
IteratorSetting is2 = iterators.get(1);
boolean foundQual = false;
boolean foundPCompare = false;
boolean foundCOpt = false;
boolean foundConst = false;
for (Map.Entry<String, String> option : is1.getOptions().entrySet()) {
String optKey = option.getKey();
if (optKey.equals(PrimitiveComparisonFilter.COLUMN)) {
foundQual = true;
assertEquals(option.getValue(), "cf:f1");
} else if (optKey.equals(PrimitiveComparisonFilter.CONST_VAL)) {
foundConst = true;
assertEquals(option.getValue(), new String(Base64.encodeBase64("aaa".getBytes())));
} else if (optKey.equals(PrimitiveComparisonFilter.COMPARE_OPT_CLASS)) {
foundCOpt = true;
assertEquals(option.getValue(), LessThanOrEqual.class.getName());
} else if (optKey.equals(PrimitiveComparisonFilter.P_COMPARE_CLASS)) {
foundPCompare = true;
assertEquals(option.getValue(), StringCompare.class.getName());
}
}
assertTrue(foundConst & foundCOpt & foundPCompare & foundQual);
foundQual = false;
foundPCompare = false;
foundCOpt = false;
foundConst = false;
for (Map.Entry<String, String> option : is2.getOptions().entrySet()) {
String optKey = option.getKey();
if (optKey.equals(PrimitiveComparisonFilter.COLUMN)) {
foundQual = true;
assertEquals(option.getValue(), "cf:f2");
} else if (optKey.equals(PrimitiveComparisonFilter.CONST_VAL)) {
foundConst = true;
byte[] intVal = new byte[4];
ByteBuffer.wrap(intVal).putInt(5);
assertEquals(option.getValue(), new String(Base64.encodeBase64(intVal)));
} else if (optKey.equals(PrimitiveComparisonFilter.COMPARE_OPT_CLASS)) {
foundCOpt = true;
assertEquals(option.getValue(), GreaterThan.class.getName());
} else if (optKey.equals(PrimitiveComparisonFilter.P_COMPARE_CLASS)) {
foundPCompare = true;
assertEquals(option.getValue(), IntCompare.class.getName());
}
}
assertTrue(foundConst & foundCOpt & foundPCompare & foundQual);
}
use of org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan in project hive by apache.
the class TestAccumuloPredicateHandler method testDisjointRanges.
@Test
public void testDisjointRanges() throws SerDeException {
ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false);
ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "aaa");
List<ExprNodeDesc> children = Lists.newArrayList();
children.add(column);
children.add(constant);
ExprNodeDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPEqualOrLessThan(), children);
assertNotNull(node);
ExprNodeDesc column2 = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false);
ExprNodeDesc constant2 = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "bbb");
List<ExprNodeDesc> children2 = Lists.newArrayList();
children2.add(column2);
children2.add(constant2);
ExprNodeDesc node2 = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPGreaterThan(), children2);
assertNotNull(node2);
List<ExprNodeDesc> bothFilters = Lists.newArrayList();
bothFilters.add(node);
bothFilters.add(node2);
ExprNodeGenericFuncDesc both = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPAnd(), bothFilters);
String filterExpr = SerializationUtilities.serializeExpression(both);
conf.set(TableScanDesc.FILTER_EXPR_CONF_STR, filterExpr);
Collection<Range> ranges = handler.getRanges(conf, columnMapper);
// Impossible to get ranges for row <= 'aaa' and row >= 'bbb'
assertEquals(0, ranges.size());
}
use of org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan in project hive by apache.
the class TestOrcSplitElimination method testSplitEliminationComplexExpr.
@Test
public void testSplitEliminationComplexExpr() throws Exception {
ObjectInspector inspector = createIO();
Writer writer = OrcFile.createWriter(fs, testFilePath, conf, inspector, 100000, CompressionKind.NONE, 10000, 10000);
writeData(writer);
writer.close();
HiveConf.setLongVar(conf, HiveConf.ConfVars.MAPREDMINSPLITSIZE, 1000);
HiveConf.setLongVar(conf, HiveConf.ConfVars.MAPREDMAXSPLITSIZE, 150000);
InputFormat<?, ?> in = new OrcInputFormat();
FileInputFormat.setInputPaths(conf, testFilePath.toString());
// predicate expression: userid <= 100 and subtype <= 1000.0
GenericUDF udf = new GenericUDFOPEqualOrLessThan();
List<ExprNodeDesc> childExpr = Lists.newArrayList();
ExprNodeColumnDesc col = new ExprNodeColumnDesc(Long.class, "userid", "T", false);
ExprNodeConstantDesc con = new ExprNodeConstantDesc(100);
childExpr.add(col);
childExpr.add(con);
ExprNodeGenericFuncDesc en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
GenericUDF udf1 = new GenericUDFOPEqualOrLessThan();
List<ExprNodeDesc> childExpr1 = Lists.newArrayList();
ExprNodeColumnDesc col1 = new ExprNodeColumnDesc(Double.class, "subtype", "T", false);
ExprNodeConstantDesc con1 = new ExprNodeConstantDesc(1000.0);
childExpr1.add(col1);
childExpr1.add(con1);
ExprNodeGenericFuncDesc en1 = new ExprNodeGenericFuncDesc(inspector, udf1, childExpr1);
GenericUDF udf2 = new GenericUDFOPAnd();
List<ExprNodeDesc> childExpr2 = Lists.newArrayList();
childExpr2.add(en);
childExpr2.add(en1);
ExprNodeGenericFuncDesc en2 = new ExprNodeGenericFuncDesc(inspector, udf2, childExpr2);
String sargStr = SerializationUtilities.serializeExpression(en2);
conf.set("hive.io.filter.expr.serialized", sargStr);
InputSplit[] splits = in.getSplits(conf, 1);
assertEquals(2, splits.length);
con = new ExprNodeConstantDesc(2);
childExpr.set(1, con);
en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
con1 = new ExprNodeConstantDesc(0.0);
childExpr1.set(1, con1);
en1 = new ExprNodeGenericFuncDesc(inspector, udf1, childExpr1);
childExpr2.set(0, en);
childExpr2.set(1, en1);
en2 = new ExprNodeGenericFuncDesc(inspector, udf2, childExpr2);
sargStr = SerializationUtilities.serializeExpression(en2);
conf.set("hive.io.filter.expr.serialized", sargStr);
splits = in.getSplits(conf, 1);
// no stripe will satisfy the predicate
assertEquals(0, splits.length);
con = new ExprNodeConstantDesc(2);
childExpr.set(1, con);
en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
con1 = new ExprNodeConstantDesc(1.0);
childExpr1.set(1, con1);
en1 = new ExprNodeGenericFuncDesc(inspector, udf1, childExpr1);
childExpr2.set(0, en);
childExpr2.set(1, en1);
en2 = new ExprNodeGenericFuncDesc(inspector, udf2, childExpr2);
sargStr = SerializationUtilities.serializeExpression(en2);
conf.set("hive.io.filter.expr.serialized", sargStr);
splits = in.getSplits(conf, 1);
// only first stripe will satisfy condition and hence single split
assertEquals(1, splits.length);
udf = new GenericUDFOPEqual();
con = new ExprNodeConstantDesc(13);
childExpr.set(1, con);
en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
con1 = new ExprNodeConstantDesc(80.0);
childExpr1.set(1, con1);
en1 = new ExprNodeGenericFuncDesc(inspector, udf1, childExpr1);
childExpr2.set(0, en);
childExpr2.set(1, en1);
en2 = new ExprNodeGenericFuncDesc(inspector, udf2, childExpr2);
sargStr = SerializationUtilities.serializeExpression(en2);
conf.set("hive.io.filter.expr.serialized", sargStr);
splits = in.getSplits(conf, 1);
// first two stripes will satisfy condition and hence single split
assertEquals(2, splits.length);
udf = new GenericUDFOPEqual();
con = new ExprNodeConstantDesc(13);
childExpr.set(1, con);
en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
udf1 = new GenericUDFOPEqual();
con1 = new ExprNodeConstantDesc(80.0);
childExpr1.set(1, con1);
en1 = new ExprNodeGenericFuncDesc(inspector, udf1, childExpr1);
childExpr2.set(0, en);
childExpr2.set(1, en1);
en2 = new ExprNodeGenericFuncDesc(inspector, udf2, childExpr2);
sargStr = SerializationUtilities.serializeExpression(en2);
conf.set("hive.io.filter.expr.serialized", sargStr);
splits = in.getSplits(conf, 1);
// only second stripes will satisfy condition and hence single split
assertEquals(1, splits.length);
}
use of org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan in project hive by apache.
the class TestOrcSplitElimination method testSplitEliminationSmallMaxSplit.
@Test
public void testSplitEliminationSmallMaxSplit() throws Exception {
ObjectInspector inspector = createIO();
Writer writer = OrcFile.createWriter(fs, testFilePath, conf, inspector, 100000, CompressionKind.NONE, 10000, 10000);
writeData(writer);
writer.close();
HiveConf.setLongVar(conf, HiveConf.ConfVars.MAPREDMINSPLITSIZE, 1000);
HiveConf.setLongVar(conf, HiveConf.ConfVars.MAPREDMAXSPLITSIZE, 5000);
InputFormat<?, ?> in = new OrcInputFormat();
FileInputFormat.setInputPaths(conf, testFilePath.toString());
GenericUDF udf = new GenericUDFOPEqualOrLessThan();
List<ExprNodeDesc> childExpr = Lists.newArrayList();
ExprNodeConstantDesc con;
ExprNodeGenericFuncDesc en;
String sargStr;
createTestSarg(inspector, udf, childExpr);
InputSplit[] splits = in.getSplits(conf, 1);
assertEquals(5, splits.length);
con = new ExprNodeConstantDesc(1);
childExpr.set(1, con);
en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
sargStr = SerializationUtilities.serializeExpression(en);
conf.set("hive.io.filter.expr.serialized", sargStr);
splits = in.getSplits(conf, 1);
assertEquals(0, splits.length);
con = new ExprNodeConstantDesc(2);
childExpr.set(1, con);
en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
sargStr = SerializationUtilities.serializeExpression(en);
conf.set("hive.io.filter.expr.serialized", sargStr);
splits = in.getSplits(conf, 1);
assertEquals(1, splits.length);
con = new ExprNodeConstantDesc(5);
childExpr.set(1, con);
en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
sargStr = SerializationUtilities.serializeExpression(en);
conf.set("hive.io.filter.expr.serialized", sargStr);
splits = in.getSplits(conf, 1);
assertEquals(2, splits.length);
con = new ExprNodeConstantDesc(13);
childExpr.set(1, con);
en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
sargStr = SerializationUtilities.serializeExpression(en);
conf.set("hive.io.filter.expr.serialized", sargStr);
splits = in.getSplits(conf, 1);
assertEquals(3, splits.length);
con = new ExprNodeConstantDesc(29);
childExpr.set(1, con);
en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
sargStr = SerializationUtilities.serializeExpression(en);
conf.set("hive.io.filter.expr.serialized", sargStr);
splits = in.getSplits(conf, 1);
assertEquals(4, splits.length);
con = new ExprNodeConstantDesc(70);
childExpr.set(1, con);
en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
sargStr = SerializationUtilities.serializeExpression(en);
conf.set("hive.io.filter.expr.serialized", sargStr);
splits = in.getSplits(conf, 1);
assertEquals(5, splits.length);
}
use of org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan in project hive by apache.
the class TestOrcSplitElimination method testSplitEliminationLargeMaxSplit.
@Test
public void testSplitEliminationLargeMaxSplit() throws Exception {
ObjectInspector inspector = createIO();
Writer writer = OrcFile.createWriter(fs, testFilePath, conf, inspector, 100000, CompressionKind.NONE, 10000, 10000);
writeData(writer);
writer.close();
HiveConf.setLongVar(conf, HiveConf.ConfVars.MAPREDMINSPLITSIZE, 1000);
HiveConf.setLongVar(conf, HiveConf.ConfVars.MAPREDMAXSPLITSIZE, 150000);
InputFormat<?, ?> in = new OrcInputFormat();
FileInputFormat.setInputPaths(conf, testFilePath.toString());
GenericUDF udf = new GenericUDFOPEqualOrLessThan();
List<ExprNodeDesc> childExpr = Lists.newArrayList();
ExprNodeConstantDesc con;
ExprNodeGenericFuncDesc en;
String sargStr;
createTestSarg(inspector, udf, childExpr);
InputSplit[] splits = in.getSplits(conf, 1);
assertEquals(2, splits.length);
con = new ExprNodeConstantDesc(0);
childExpr.set(1, con);
en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
sargStr = SerializationUtilities.serializeExpression(en);
conf.set("hive.io.filter.expr.serialized", sargStr);
splits = in.getSplits(conf, 1);
// no stripes satisfies the condition
assertEquals(0, splits.length);
con = new ExprNodeConstantDesc(2);
childExpr.set(1, con);
en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
sargStr = SerializationUtilities.serializeExpression(en);
conf.set("hive.io.filter.expr.serialized", sargStr);
splits = in.getSplits(conf, 1);
// only first stripe will satisfy condition and hence single split
assertEquals(1, splits.length);
con = new ExprNodeConstantDesc(5);
childExpr.set(1, con);
en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
sargStr = SerializationUtilities.serializeExpression(en);
conf.set("hive.io.filter.expr.serialized", sargStr);
splits = in.getSplits(conf, 1);
// first stripe will satisfy the predicate and will be a single split, last stripe will be a
// separate split
assertEquals(2, splits.length);
con = new ExprNodeConstantDesc(13);
childExpr.set(1, con);
en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
sargStr = SerializationUtilities.serializeExpression(en);
conf.set("hive.io.filter.expr.serialized", sargStr);
splits = in.getSplits(conf, 1);
// first 2 stripes will satisfy the predicate and merged to single split, last stripe will be a
// separate split
assertEquals(2, splits.length);
con = new ExprNodeConstantDesc(29);
childExpr.set(1, con);
en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
sargStr = SerializationUtilities.serializeExpression(en);
conf.set("hive.io.filter.expr.serialized", sargStr);
splits = in.getSplits(conf, 1);
// first 3 stripes will satisfy the predicate and merged to single split, last stripe will be a
// separate split
assertEquals(2, splits.length);
con = new ExprNodeConstantDesc(70);
childExpr.set(1, con);
en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
sargStr = SerializationUtilities.serializeExpression(en);
conf.set("hive.io.filter.expr.serialized", sargStr);
splits = in.getSplits(conf, 1);
// first 2 stripes will satisfy the predicate and merged to single split, last two stripe will
// be a separate split
assertEquals(2, splits.length);
}
Aggregations