Search in sources :

Example 1 with GenericUDFOPLessThan

use of org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan in project hive by apache.

the class TestAccumuloRangeGenerator method testDateRangeConjunction.

@Test
public void testDateRangeConjunction() throws Exception {
    // rowId >= '2014-01-01'
    ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false);
    ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.dateTypeInfo, Date.valueOf("2014-01-01"));
    List<ExprNodeDesc> children = Lists.newArrayList();
    children.add(column);
    children.add(constant);
    ExprNodeDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPEqualOrGreaterThan(), children);
    assertNotNull(node);
    // rowId <= '2014-07-01'
    ExprNodeDesc column2 = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false);
    ExprNodeDesc constant2 = new ExprNodeConstantDesc(TypeInfoFactory.dateTypeInfo, Date.valueOf("2014-07-01"));
    List<ExprNodeDesc> children2 = Lists.newArrayList();
    children2.add(column2);
    children2.add(constant2);
    ExprNodeDesc node2 = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPLessThan(), children2);
    assertNotNull(node2);
    // And UDF
    List<ExprNodeDesc> bothFilters = Lists.newArrayList();
    bothFilters.add(node);
    bothFilters.add(node2);
    ExprNodeGenericFuncDesc both = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPAnd(), bothFilters);
    // Should generate [2014-01-01, 2014-07-01)
    List<Range> expectedRanges = Arrays.asList(new Range(new Key("2014-01-01"), true, new Key("2014-07-01"), false));
    AccumuloRangeGenerator rangeGenerator = new AccumuloRangeGenerator(handler, rowIdMapping, "rid");
    Dispatcher disp = new DefaultRuleDispatcher(rangeGenerator, Collections.<Rule, NodeProcessor>emptyMap(), null);
    GraphWalker ogw = new DefaultGraphWalker(disp);
    ArrayList<Node> topNodes = new ArrayList<Node>();
    topNodes.add(both);
    HashMap<Node, Object> nodeOutput = new HashMap<Node, Object>();
    try {
        ogw.startWalking(topNodes, nodeOutput);
    } catch (SemanticException ex) {
        throw new RuntimeException(ex);
    }
    Object result = nodeOutput.get(both);
    Assert.assertNotNull(result);
    Assert.assertTrue("Result from graph walk was not a List", result instanceof List);
    @SuppressWarnings("unchecked") List<Range> actualRanges = (List<Range>) result;
    Assert.assertEquals(expectedRanges, actualRanges);
}
Also used : HashMap(java.util.HashMap) DefaultGraphWalker(org.apache.hadoop.hive.ql.lib.DefaultGraphWalker) Node(org.apache.hadoop.hive.ql.lib.Node) ArrayList(java.util.ArrayList) Dispatcher(org.apache.hadoop.hive.ql.lib.Dispatcher) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ArrayList(java.util.ArrayList) List(java.util.List) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) DefaultGraphWalker(org.apache.hadoop.hive.ql.lib.DefaultGraphWalker) GraphWalker(org.apache.hadoop.hive.ql.lib.GraphWalker) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) Range(org.apache.accumulo.core.data.Range) GenericUDFOPLessThan(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan) GenericUDFOPEqualOrGreaterThan(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan) Key(org.apache.accumulo.core.data.Key) GenericUDFOPAnd(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd) Test(org.junit.Test)

Example 2 with GenericUDFOPLessThan

use of org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan in project hive by apache.

the class TestAccumuloPredicateHandler method testRowRangeGeneration.

@Test
public void testRowRangeGeneration() throws SerDeException {
    List<String> columnNames = Arrays.asList("key", "column");
    List<TypeInfo> columnTypes = Arrays.<TypeInfo>asList(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo);
    conf.set(serdeConstants.LIST_COLUMNS, Joiner.on(',').join(columnNames));
    conf.set(serdeConstants.LIST_COLUMN_TYPES, "string,string");
    String columnMappingStr = ":rowID,cf:f1";
    conf.set(AccumuloSerDeParameters.COLUMN_MAPPINGS, columnMappingStr);
    columnMapper = new ColumnMapper(columnMappingStr, ColumnEncoding.STRING.getName(), columnNames, columnTypes);
    // 100 < key
    ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.intTypeInfo, "key", null, false);
    ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.intTypeInfo, 100);
    List<ExprNodeDesc> children = Lists.newArrayList();
    children.add(constant);
    children.add(column);
    ExprNodeGenericFuncDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPLessThan(), children);
    assertNotNull(node);
    String filterExpr = SerializationUtilities.serializeExpression(node);
    conf.set(TableScanDesc.FILTER_EXPR_CONF_STR, filterExpr);
    // Should make (100, +inf)
    List<Range> ranges = handler.getRanges(conf, columnMapper);
    Assert.assertEquals(1, ranges.size());
    Assert.assertEquals(new Range(new Text("100"), false, null, false), ranges.get(0));
}
Also used : ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) Text(org.apache.hadoop.io.Text) Range(org.apache.accumulo.core.data.Range) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) GenericUDFOPLessThan(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) ColumnMapper(org.apache.hadoop.hive.accumulo.columns.ColumnMapper) Test(org.junit.Test)

Example 3 with GenericUDFOPLessThan

use of org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan in project hive by apache.

the class TestAccumuloPredicateHandler method testBinaryRangeGeneration.

@Test
public void testBinaryRangeGeneration() throws Exception {
    List<String> columnNames = Arrays.asList("key", "column");
    List<TypeInfo> columnTypes = Arrays.<TypeInfo>asList(TypeInfoFactory.intTypeInfo, TypeInfoFactory.stringTypeInfo);
    conf.set(serdeConstants.LIST_COLUMNS, Joiner.on(',').join(columnNames));
    conf.set(serdeConstants.LIST_COLUMN_TYPES, "int,string");
    String columnMappingStr = ":rowID#b,cf:f1";
    conf.set(AccumuloSerDeParameters.COLUMN_MAPPINGS, columnMappingStr);
    columnMapper = new ColumnMapper(columnMappingStr, ColumnEncoding.STRING.getName(), columnNames, columnTypes);
    int intValue = 100;
    // Make binary integer value in the bytearray
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    JavaIntObjectInspector intOI = (JavaIntObjectInspector) PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.INT_TYPE_NAME));
    LazyUtils.writePrimitive(baos, intValue, intOI);
    // 100 < key
    ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.intTypeInfo, "key", null, false);
    ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.intTypeInfo, intValue);
    List<ExprNodeDesc> children = Lists.newArrayList();
    children.add(constant);
    children.add(column);
    ExprNodeGenericFuncDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPLessThan(), children);
    assertNotNull(node);
    String filterExpr = SerializationUtilities.serializeExpression(node);
    conf.set(TableScanDesc.FILTER_EXPR_CONF_STR, filterExpr);
    // Should make (100, +inf)
    List<Range> ranges = handler.getRanges(conf, columnMapper);
    Assert.assertEquals(1, ranges.size());
    Assert.assertEquals(new Range(new Text(baos.toByteArray()), false, null, false), ranges.get(0));
}
Also used : ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) JavaIntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaIntObjectInspector) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) Text(org.apache.hadoop.io.Text) ByteArrayOutputStream(java.io.ByteArrayOutputStream) Range(org.apache.accumulo.core.data.Range) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) GenericUDFOPLessThan(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) ColumnMapper(org.apache.hadoop.hive.accumulo.columns.ColumnMapper) Test(org.junit.Test)

Example 4 with GenericUDFOPLessThan

use of org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan in project mongo-hadoop by mongodb.

the class HiveMongoInputFormatTest method testTranslateConjoinedQuery.

@Test
public void testTranslateConjoinedQuery() {
    // i < 50
    GenericUDFOPLessThan lt = new GenericUDFOPLessThan();
    ExprNodeDesc[] iLt50Children = { new ExprNodeColumnDesc(new SimpleMockColumnInfo("i")), new ExprNodeConstantDesc(50) };
    ExprNodeGenericFuncDesc iLt50 = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, lt, Arrays.asList(iLt50Children));
    // j > 20
    GenericUDFOPGreaterThan gt = new GenericUDFOPGreaterThan();
    ExprNodeDesc[] jGt20Children = { new ExprNodeColumnDesc(new SimpleMockColumnInfo("j")), new ExprNodeConstantDesc(20) };
    ExprNodeGenericFuncDesc jGt20 = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, gt, Arrays.asList(jGt20Children));
    // i < 50 AND j > 20
    ExprNodeDesc[] andExprChildren = { iLt50, jGt20 };
    ExprNodeGenericFuncDesc expr = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, new GenericUDFOPAnd(), Arrays.asList(andExprChildren));
    assertEquals(// {"$and": [{"i": {"$lt": 50}}, {"j": {"$gt": 20}}]}
    new BasicDBObjectBuilder().push("mongo_i").add("$lt", 50).pop().push("mongo_j").add("$gt", 20).pop().get(), filterForExpr(expr));
}
Also used : GenericUDFOPGreaterThan(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan) ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) BasicDBObjectBuilder(com.mongodb.BasicDBObjectBuilder) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) GenericUDFOPLessThan(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan) GenericUDFOPAnd(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd) HiveTest(com.mongodb.hadoop.hive.HiveTest) Test(org.junit.Test)

Example 5 with GenericUDFOPLessThan

use of org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan in project hive by apache.

the class TestAccumuloPredicateHandler method rangeLessThan.

@Test
public void rangeLessThan() throws SerDeException {
    ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false);
    ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "aaa");
    List<ExprNodeDesc> children = Lists.newArrayList();
    children.add(column);
    children.add(constant);
    ExprNodeGenericFuncDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPLessThan(), children);
    assertNotNull(node);
    String filterExpr = SerializationUtilities.serializeExpression(node);
    conf.set(TableScanDesc.FILTER_EXPR_CONF_STR, filterExpr);
    Collection<Range> ranges = handler.getRanges(conf, columnMapper);
    assertEquals(ranges.size(), 1);
    Range range = ranges.iterator().next();
    assertTrue(range.isStartKeyInclusive());
    assertFalse(range.isEndKeyInclusive());
    assertFalse(range.contains(new Key(new Text("aaa"))));
    assertTrue(range.afterEndKey(new Key(new Text("ccccc"))));
    assertTrue(range.contains(new Key(new Text("aa"))));
    assertTrue(range.afterEndKey(new Key(new Text("aab"))));
    assertTrue(range.afterEndKey(new Key(new Text("aaa"))));
}
Also used : ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) Text(org.apache.hadoop.io.Text) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) Range(org.apache.accumulo.core.data.Range) GenericUDFOPLessThan(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan) Key(org.apache.accumulo.core.data.Key) Test(org.junit.Test)

Aggregations

ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)8 ExprNodeConstantDesc (org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc)8 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)8 ExprNodeGenericFuncDesc (org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)8 GenericUDFOPLessThan (org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan)8 Test (org.junit.Test)8 Range (org.apache.accumulo.core.data.Range)5 GenericUDFOPAnd (org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd)5 ArrayList (java.util.ArrayList)3 Key (org.apache.accumulo.core.data.Key)3 GenericUDFOPGreaterThan (org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan)3 Text (org.apache.hadoop.io.Text)3 ColumnMapper (org.apache.hadoop.hive.accumulo.columns.ColumnMapper)2 DynamicValueVectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.DynamicValueVectorExpression)2 VectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)2 GenericUDFOPEqualOrGreaterThan (org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan)2 GenericUDFOPOr (org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr)2 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)2 BasicDBObjectBuilder (com.mongodb.BasicDBObjectBuilder)1 HiveTest (com.mongodb.hadoop.hive.HiveTest)1