Search in sources :

Example 1 with Range

use of org.apache.accumulo.core.data.Range in project hive by apache.

the class TestAccumuloRangeGenerator method testRangeConjunctionWithDisjunction.

@Test
public void testRangeConjunctionWithDisjunction() throws Exception {
    // rowId >= 'h'
    ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false);
    ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "h");
    List<ExprNodeDesc> children = Lists.newArrayList();
    children.add(column);
    children.add(constant);
    ExprNodeDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPEqualOrGreaterThan(), children);
    assertNotNull(node);
    // rowId <= 'd'
    ExprNodeDesc column2 = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false);
    ExprNodeDesc constant2 = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "d");
    List<ExprNodeDesc> children2 = Lists.newArrayList();
    children2.add(column2);
    children2.add(constant2);
    ExprNodeDesc node2 = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPEqualOrLessThan(), children2);
    assertNotNull(node2);
    // rowId >= 'q'
    ExprNodeDesc column3 = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false);
    ExprNodeDesc constant3 = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "q");
    List<ExprNodeDesc> children3 = Lists.newArrayList();
    children3.add(column3);
    children3.add(constant3);
    ExprNodeDesc node3 = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPEqualOrGreaterThan(), children3);
    assertNotNull(node3);
    // Or UDF, (rowId <= 'd' or rowId >= 'q')
    List<ExprNodeDesc> orFilters = Lists.newArrayList();
    orFilters.add(node2);
    orFilters.add(node3);
    ExprNodeGenericFuncDesc orNode = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPOr(), orFilters);
    // And UDF, (rowId >= 'h' and (rowId <= 'd' or rowId >= 'q'))
    List<ExprNodeDesc> andFilters = Lists.newArrayList();
    andFilters.add(node);
    andFilters.add(orNode);
    ExprNodeGenericFuncDesc both = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPAnd(), andFilters);
    // Should generate ['q', +inf)
    List<Range> expectedRanges = Arrays.asList(new Range(new Key("q"), true, null, false));
    AccumuloRangeGenerator rangeGenerator = new AccumuloRangeGenerator(handler, rowIdMapping, "rid");
    Dispatcher disp = new DefaultRuleDispatcher(rangeGenerator, Collections.<Rule, NodeProcessor>emptyMap(), null);
    GraphWalker ogw = new DefaultGraphWalker(disp);
    ArrayList<Node> topNodes = new ArrayList<Node>();
    topNodes.add(both);
    HashMap<Node, Object> nodeOutput = new HashMap<Node, Object>();
    try {
        ogw.startWalking(topNodes, nodeOutput);
    } catch (SemanticException ex) {
        throw new RuntimeException(ex);
    }
    Object result = nodeOutput.get(both);
    Assert.assertNotNull(result);
    Assert.assertTrue("Result from graph walk was not a List", result instanceof List);
    @SuppressWarnings("unchecked") List<Range> actualRanges = (List<Range>) result;
    Assert.assertEquals(expectedRanges, actualRanges);
}
Also used : GenericUDFOPEqualOrLessThan(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan) HashMap(java.util.HashMap) DefaultGraphWalker(org.apache.hadoop.hive.ql.lib.DefaultGraphWalker) Node(org.apache.hadoop.hive.ql.lib.Node) ArrayList(java.util.ArrayList) Dispatcher(org.apache.hadoop.hive.ql.lib.Dispatcher) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ArrayList(java.util.ArrayList) List(java.util.List) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) DefaultGraphWalker(org.apache.hadoop.hive.ql.lib.DefaultGraphWalker) GraphWalker(org.apache.hadoop.hive.ql.lib.GraphWalker) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) Range(org.apache.accumulo.core.data.Range) GenericUDFOPEqualOrGreaterThan(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan) GenericUDFOPOr(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr) Key(org.apache.accumulo.core.data.Key) GenericUDFOPAnd(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd) Test(org.junit.Test)

Example 2 with Range

use of org.apache.accumulo.core.data.Range in project hive by apache.

the class TestAccumuloRangeGenerator method testRangeConjunction.

@Test
public void testRangeConjunction() throws Exception {
    // rowId >= 'f'
    ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false);
    ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "f");
    List<ExprNodeDesc> children = Lists.newArrayList();
    children.add(column);
    children.add(constant);
    ExprNodeDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPEqualOrGreaterThan(), children);
    assertNotNull(node);
    // rowId <= 'm'
    ExprNodeDesc column2 = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false);
    ExprNodeDesc constant2 = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "m");
    List<ExprNodeDesc> children2 = Lists.newArrayList();
    children2.add(column2);
    children2.add(constant2);
    ExprNodeDesc node2 = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPEqualOrLessThan(), children2);
    assertNotNull(node2);
    // And UDF
    List<ExprNodeDesc> bothFilters = Lists.newArrayList();
    bothFilters.add(node);
    bothFilters.add(node2);
    ExprNodeGenericFuncDesc both = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPAnd(), bothFilters);
    // Should generate [f,m]
    List<Range> expectedRanges = Arrays.asList(new Range(new Key("f"), true, new Key("m\0"), false));
    AccumuloRangeGenerator rangeGenerator = new AccumuloRangeGenerator(handler, rowIdMapping, "rid");
    Dispatcher disp = new DefaultRuleDispatcher(rangeGenerator, Collections.<Rule, NodeProcessor>emptyMap(), null);
    GraphWalker ogw = new DefaultGraphWalker(disp);
    ArrayList<Node> topNodes = new ArrayList<Node>();
    topNodes.add(both);
    HashMap<Node, Object> nodeOutput = new HashMap<Node, Object>();
    try {
        ogw.startWalking(topNodes, nodeOutput);
    } catch (SemanticException ex) {
        throw new RuntimeException(ex);
    }
    Object result = nodeOutput.get(both);
    Assert.assertNotNull(result);
    Assert.assertTrue("Result from graph walk was not a List", result instanceof List);
    @SuppressWarnings("unchecked") List<Range> actualRanges = (List<Range>) result;
    Assert.assertEquals(expectedRanges, actualRanges);
}
Also used : GenericUDFOPEqualOrLessThan(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan) HashMap(java.util.HashMap) DefaultGraphWalker(org.apache.hadoop.hive.ql.lib.DefaultGraphWalker) Node(org.apache.hadoop.hive.ql.lib.Node) ArrayList(java.util.ArrayList) Dispatcher(org.apache.hadoop.hive.ql.lib.Dispatcher) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ArrayList(java.util.ArrayList) List(java.util.List) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) DefaultGraphWalker(org.apache.hadoop.hive.ql.lib.DefaultGraphWalker) GraphWalker(org.apache.hadoop.hive.ql.lib.GraphWalker) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) Range(org.apache.accumulo.core.data.Range) GenericUDFOPEqualOrGreaterThan(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan) Key(org.apache.accumulo.core.data.Key) GenericUDFOPAnd(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd) Test(org.junit.Test)

Example 3 with Range

use of org.apache.accumulo.core.data.Range in project hive by apache.

the class TestAccumuloRangeGenerator method testDateRangeConjunction.

@Test
public void testDateRangeConjunction() throws Exception {
    // rowId >= '2014-01-01'
    ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false);
    ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.dateTypeInfo, Date.valueOf("2014-01-01"));
    List<ExprNodeDesc> children = Lists.newArrayList();
    children.add(column);
    children.add(constant);
    ExprNodeDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPEqualOrGreaterThan(), children);
    assertNotNull(node);
    // rowId <= '2014-07-01'
    ExprNodeDesc column2 = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false);
    ExprNodeDesc constant2 = new ExprNodeConstantDesc(TypeInfoFactory.dateTypeInfo, Date.valueOf("2014-07-01"));
    List<ExprNodeDesc> children2 = Lists.newArrayList();
    children2.add(column2);
    children2.add(constant2);
    ExprNodeDesc node2 = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPLessThan(), children2);
    assertNotNull(node2);
    // And UDF
    List<ExprNodeDesc> bothFilters = Lists.newArrayList();
    bothFilters.add(node);
    bothFilters.add(node2);
    ExprNodeGenericFuncDesc both = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPAnd(), bothFilters);
    // Should generate [2014-01-01, 2014-07-01)
    List<Range> expectedRanges = Arrays.asList(new Range(new Key("2014-01-01"), true, new Key("2014-07-01"), false));
    AccumuloRangeGenerator rangeGenerator = new AccumuloRangeGenerator(handler, rowIdMapping, "rid");
    Dispatcher disp = new DefaultRuleDispatcher(rangeGenerator, Collections.<Rule, NodeProcessor>emptyMap(), null);
    GraphWalker ogw = new DefaultGraphWalker(disp);
    ArrayList<Node> topNodes = new ArrayList<Node>();
    topNodes.add(both);
    HashMap<Node, Object> nodeOutput = new HashMap<Node, Object>();
    try {
        ogw.startWalking(topNodes, nodeOutput);
    } catch (SemanticException ex) {
        throw new RuntimeException(ex);
    }
    Object result = nodeOutput.get(both);
    Assert.assertNotNull(result);
    Assert.assertTrue("Result from graph walk was not a List", result instanceof List);
    @SuppressWarnings("unchecked") List<Range> actualRanges = (List<Range>) result;
    Assert.assertEquals(expectedRanges, actualRanges);
}
Also used : HashMap(java.util.HashMap) DefaultGraphWalker(org.apache.hadoop.hive.ql.lib.DefaultGraphWalker) Node(org.apache.hadoop.hive.ql.lib.Node) ArrayList(java.util.ArrayList) Dispatcher(org.apache.hadoop.hive.ql.lib.Dispatcher) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ArrayList(java.util.ArrayList) List(java.util.List) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) DefaultGraphWalker(org.apache.hadoop.hive.ql.lib.DefaultGraphWalker) GraphWalker(org.apache.hadoop.hive.ql.lib.GraphWalker) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) Range(org.apache.accumulo.core.data.Range) GenericUDFOPLessThan(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan) GenericUDFOPEqualOrGreaterThan(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan) Key(org.apache.accumulo.core.data.Key) GenericUDFOPAnd(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd) Test(org.junit.Test)

Example 4 with Range

use of org.apache.accumulo.core.data.Range in project hive by apache.

the class AccumuloRangeGenerator method processAndOpNode.

protected Object processAndOpNode(Node nd, Object[] nodeOutputs) {
    // We might have multiple ranges coming from children
    List<Range> andRanges = null;
    for (Object nodeOutput : nodeOutputs) {
        // of Accumulo Ranges
        if (null == nodeOutput) {
            continue;
        }
        // of Ranges implies that there are no possible Ranges to lookup.
        if (null == andRanges) {
            andRanges = new ArrayList<Range>();
        }
        // The child is a single Range
        if (nodeOutput instanceof Range) {
            Range childRange = (Range) nodeOutput;
            // No existing ranges, just accept the current
            if (andRanges.isEmpty()) {
                andRanges.add(childRange);
            } else {
                // For each range we have, intersect them. If they don't overlap
                // the range can be discarded
                List<Range> newRanges = new ArrayList<Range>();
                for (Range andRange : andRanges) {
                    Range intersectedRange = andRange.clip(childRange, true);
                    if (null != intersectedRange) {
                        newRanges.add(intersectedRange);
                    }
                }
                // Set the newly-constructed ranges as the current state
                andRanges = newRanges;
            }
        } else if (nodeOutput instanceof List) {
            @SuppressWarnings("unchecked") List<Range> childRanges = (List<Range>) nodeOutput;
            // No ranges, use the ranges from the child
            if (andRanges.isEmpty()) {
                andRanges.addAll(childRanges);
            } else {
                List<Range> newRanges = new ArrayList<Range>();
                // Cartesian product of our ranges, to the child ranges
                for (Range andRange : andRanges) {
                    for (Range childRange : childRanges) {
                        Range intersectedRange = andRange.clip(childRange, true);
                        // Retain only valid intersections (discard disjoint ranges)
                        if (null != intersectedRange) {
                            newRanges.add(intersectedRange);
                        }
                    }
                }
                // Set the newly-constructed ranges as the current state
                andRanges = newRanges;
            }
        } else {
            LOG.error("Expected Range from {} but got {}", nd, nodeOutput);
            throw new IllegalArgumentException("Expected Range but got " + nodeOutput.getClass().getName());
        }
    }
    return andRanges;
}
Also used : ArrayList(java.util.ArrayList) ArrayList(java.util.ArrayList) List(java.util.List) Range(org.apache.accumulo.core.data.Range)

Example 5 with Range

use of org.apache.accumulo.core.data.Range in project hive by apache.

the class TestHiveAccumuloTableInputFormat method testConfigureAccumuloInputFormat.

@Test
public void testConfigureAccumuloInputFormat() throws Exception {
    AccumuloConnectionParameters accumuloParams = new AccumuloConnectionParameters(conf);
    ColumnMapper columnMapper = new ColumnMapper(conf.get(AccumuloSerDeParameters.COLUMN_MAPPINGS), conf.get(AccumuloSerDeParameters.DEFAULT_STORAGE_TYPE), columnNames, columnTypes);
    Set<Pair<Text, Text>> cfCqPairs = inputformat.getPairCollection(columnMapper.getColumnMappings());
    List<IteratorSetting> iterators = Collections.emptyList();
    Set<Range> ranges = Collections.singleton(new Range());
    String instanceName = "realInstance";
    String zookeepers = "host1:2181,host2:2181,host3:2181";
    ZooKeeperInstance zkInstance = Mockito.mock(ZooKeeperInstance.class);
    HiveAccumuloTableInputFormat mockInputFormat = Mockito.mock(HiveAccumuloTableInputFormat.class);
    // Stub out the ZKI mock
    Mockito.when(zkInstance.getInstanceName()).thenReturn(instanceName);
    Mockito.when(zkInstance.getZooKeepers()).thenReturn(zookeepers);
    // Call out to the real configure method
    Mockito.doCallRealMethod().when(mockInputFormat).configure(conf, zkInstance, con, accumuloParams, columnMapper, iterators, ranges);
    // Also compute the correct cf:cq pairs so we can assert the right argument was passed
    Mockito.doCallRealMethod().when(mockInputFormat).getPairCollection(columnMapper.getColumnMappings());
    mockInputFormat.configure(conf, zkInstance, con, accumuloParams, columnMapper, iterators, ranges);
    // Verify that the correct methods are invoked on AccumuloInputFormat
    Mockito.verify(mockInputFormat).setZooKeeperInstance(conf, instanceName, zookeepers, false);
    Mockito.verify(mockInputFormat).setConnectorInfo(conf, USER, new PasswordToken(PASS));
    Mockito.verify(mockInputFormat).setInputTableName(conf, TEST_TABLE);
    Mockito.verify(mockInputFormat).setScanAuthorizations(conf, con.securityOperations().getUserAuthorizations(USER));
    Mockito.verify(mockInputFormat).addIterators(conf, iterators);
    Mockito.verify(mockInputFormat).setRanges(conf, ranges);
    Mockito.verify(mockInputFormat).fetchColumns(conf, cfCqPairs);
}
Also used : PasswordToken(org.apache.accumulo.core.client.security.tokens.PasswordToken) IteratorSetting(org.apache.accumulo.core.client.IteratorSetting) AccumuloConnectionParameters(org.apache.hadoop.hive.accumulo.AccumuloConnectionParameters) Range(org.apache.accumulo.core.data.Range) ColumnMapper(org.apache.hadoop.hive.accumulo.columns.ColumnMapper) Pair(org.apache.accumulo.core.util.Pair) ZooKeeperInstance(org.apache.accumulo.core.client.ZooKeeperInstance) Test(org.junit.Test)

Aggregations

Range (org.apache.accumulo.core.data.Range)67 Test (org.junit.Test)37 Key (org.apache.accumulo.core.data.Key)32 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)19 Text (org.apache.hadoop.io.Text)17 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)15 ExprNodeConstantDesc (org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc)15 ExprNodeGenericFuncDesc (org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)15 ArrayList (java.util.ArrayList)14 Edge (uk.gov.gchq.gaffer.data.element.Edge)9 List (java.util.List)8 IteratorSetting (org.apache.accumulo.core.client.IteratorSetting)8 ColumnMapper (org.apache.hadoop.hive.accumulo.columns.ColumnMapper)8 GenericUDFOPEqualOrGreaterThan (org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan)8 PasswordToken (org.apache.accumulo.core.client.security.tokens.PasswordToken)7 GenericUDFOPAnd (org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd)7 GenericUDFOPEqualOrLessThan (org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan)7 HashMap (java.util.HashMap)6 AccumuloConnectionParameters (org.apache.hadoop.hive.accumulo.AccumuloConnectionParameters)6 Entity (uk.gov.gchq.gaffer.data.element.Entity)6