Search in sources :

Example 1 with BatchScanner

use of org.apache.accumulo.core.client.BatchScanner in project presto by prestodb.

the class IndexLookup method getIndexRanges.

private List<Range> getIndexRanges(String indexTable, Multimap<AccumuloColumnConstraint, Range> constraintRanges, Collection<Range> rowIDRanges, Authorizations auths) throws TableNotFoundException {
    Set<Range> finalRanges = null;
    // For each column/constraint pair
    for (Entry<AccumuloColumnConstraint, Collection<Range>> constraintEntry : constraintRanges.asMap().entrySet()) {
        // Create a batch scanner against the index table, setting the ranges
        BatchScanner scanner = connector.createBatchScanner(indexTable, auths, 10);
        scanner.setRanges(constraintEntry.getValue());
        // Fetch the column family for this specific column
        Text family = new Text(Indexer.getIndexColumnFamily(constraintEntry.getKey().getFamily().getBytes(UTF_8), constraintEntry.getKey().getQualifier().getBytes(UTF_8)).array());
        scanner.fetchColumnFamily(family);
        // For each entry in the scanner
        Text tmpQualifier = new Text();
        Set<Range> columnRanges = new HashSet<>();
        for (Entry<Key, Value> entry : scanner) {
            entry.getKey().getColumnQualifier(tmpQualifier);
            // Add to our column ranges if it is in one of the row ID ranges
            if (inRange(tmpQualifier, rowIDRanges)) {
                columnRanges.add(new Range(tmpQualifier));
            }
        }
        LOG.debug("Retrieved %d ranges for column %s", columnRanges.size(), constraintEntry.getKey().getName());
        // If finalRanges is null, we have not yet added any column ranges
        if (finalRanges == null) {
            finalRanges = new HashSet<>();
            finalRanges.addAll(columnRanges);
        } else {
            // Retain only the row IDs for this column that have already been added
            // This is your set intersection operation!
            finalRanges.retainAll(columnRanges);
        }
        // Close the scanner
        scanner.close();
    }
    // Return the final ranges for all constraint pairs
    if (finalRanges != null) {
        return ImmutableList.copyOf(finalRanges);
    } else {
        return ImmutableList.of();
    }
}
Also used : AccumuloColumnConstraint(com.facebook.presto.accumulo.model.AccumuloColumnConstraint) BatchScanner(org.apache.accumulo.core.client.BatchScanner) Value(org.apache.accumulo.core.data.Value) Collection(java.util.Collection) Text(org.apache.hadoop.io.Text) Range(org.apache.accumulo.core.data.Range) Key(org.apache.accumulo.core.data.Key) HashSet(java.util.HashSet)

Example 2 with BatchScanner

use of org.apache.accumulo.core.client.BatchScanner in project Gaffer by gchq.

the class RowIdAggregatorTest method testAggregatingMultiplePropertySetsAcrossRowIDRange.

private void testAggregatingMultiplePropertySetsAcrossRowIDRange(final AccumuloStore store, final AccumuloElementConverter elementConverter) throws StoreException, AccumuloElementConversionException, RangeFactoryException {
    String visibilityString = "public";
    try {
        // Create table
        // (this method creates the table, removes the versioning iterator, and adds the SetOfStatisticsCombiner iterator).
        TableUtils.createTable(store);
        final Properties properties1 = new Properties();
        properties1.put(AccumuloPropertyNames.COUNT, 1);
        final Properties properties2 = new Properties();
        properties2.put(AccumuloPropertyNames.COUNT, 1);
        final Properties properties3 = new Properties();
        properties3.put(AccumuloPropertyNames.COUNT, 2);
        // Create edge
        final Edge edge = new Edge(TestGroups.EDGE);
        edge.setSource("2");
        edge.setDestination("1");
        edge.setDirected(true);
        edge.putProperty(AccumuloPropertyNames.COLUMN_QUALIFIER, 1);
        edge.putProperty(AccumuloPropertyNames.PROP_1, 0);
        edge.putProperty(AccumuloPropertyNames.PROP_2, 0);
        edge.putProperty(AccumuloPropertyNames.PROP_3, 0);
        edge.putProperty(AccumuloPropertyNames.PROP_4, 0);
        final Edge edge2 = new Edge(TestGroups.EDGE);
        edge2.setSource("B");
        edge2.setDestination("Z");
        edge2.setDirected(true);
        edge2.putProperty(AccumuloPropertyNames.COLUMN_QUALIFIER, 1);
        edge2.putProperty(AccumuloPropertyNames.PROP_1, 1);
        edge2.putProperty(AccumuloPropertyNames.PROP_2, 1);
        edge2.putProperty(AccumuloPropertyNames.PROP_3, 1);
        edge2.putProperty(AccumuloPropertyNames.PROP_4, 1);
        final Edge edge3 = new Edge(TestGroups.EDGE);
        edge3.setSource("3");
        edge3.setDestination("8");
        edge3.setDirected(true);
        edge3.putProperty(AccumuloPropertyNames.COLUMN_QUALIFIER, 1);
        edge3.putProperty(AccumuloPropertyNames.PROP_1, 0);
        edge3.putProperty(AccumuloPropertyNames.PROP_2, 0);
        edge3.putProperty(AccumuloPropertyNames.PROP_3, 0);
        edge3.putProperty(AccumuloPropertyNames.PROP_4, 0);
        final Edge edge6 = new Edge("BasicEdge2");
        edge6.setSource("1");
        edge6.setDestination("5");
        edge6.setDirected(true);
        edge6.putProperty(AccumuloPropertyNames.COLUMN_QUALIFIER, 2);
        edge6.putProperty(AccumuloPropertyNames.PROP_1, 0);
        edge6.putProperty(AccumuloPropertyNames.PROP_2, 0);
        edge6.putProperty(AccumuloPropertyNames.PROP_3, 0);
        edge6.putProperty(AccumuloPropertyNames.PROP_4, 0);
        final Edge edge7 = new Edge("BasicEdge2");
        edge7.setSource("2");
        edge7.setDestination("6");
        edge7.setDirected(true);
        edge7.putProperty(AccumuloPropertyNames.COLUMN_QUALIFIER, 1);
        edge7.putProperty(AccumuloPropertyNames.PROP_1, 0);
        edge7.putProperty(AccumuloPropertyNames.PROP_2, 0);
        edge7.putProperty(AccumuloPropertyNames.PROP_3, 0);
        edge7.putProperty(AccumuloPropertyNames.PROP_4, 0);
        final Edge edge8 = new Edge("BasicEdge2");
        edge8.setSource("4");
        edge8.setDestination("8");
        edge8.setDirected(true);
        edge8.putProperty(AccumuloPropertyNames.COLUMN_QUALIFIER, 2);
        edge8.putProperty(AccumuloPropertyNames.PROP_1, 0);
        edge8.putProperty(AccumuloPropertyNames.PROP_2, 0);
        edge8.putProperty(AccumuloPropertyNames.PROP_3, 0);
        edge8.putProperty(AccumuloPropertyNames.PROP_4, 0);
        final Edge edge9 = new Edge("BasicEdge2");
        edge9.setSource("5");
        edge9.setDestination("9");
        edge9.setDirected(true);
        edge9.putProperty(AccumuloPropertyNames.COLUMN_QUALIFIER, 2);
        edge9.putProperty(AccumuloPropertyNames.PROP_1, 0);
        edge9.putProperty(AccumuloPropertyNames.PROP_2, 0);
        edge9.putProperty(AccumuloPropertyNames.PROP_3, 0);
        edge9.putProperty(AccumuloPropertyNames.PROP_4, 0);
        // Accumulo key
        final Key key = elementConverter.getKeysFromEdge(edge).getFirst();
        final Key key2 = elementConverter.getKeysFromEdge(edge2).getFirst();
        final Key key3 = elementConverter.getKeysFromEdge(edge3).getFirst();
        final Key key4 = elementConverter.getKeysFromEdge(edge6).getFirst();
        final Key key5 = elementConverter.getKeysFromEdge(edge7).getFirst();
        final Key key6 = elementConverter.getKeysFromEdge(edge8).getFirst();
        final Key key7 = elementConverter.getKeysFromEdge(edge9).getFirst();
        // Accumulo values
        final Value value1 = elementConverter.getValueFromProperties(TestGroups.EDGE, properties1);
        final Value value2 = elementConverter.getValueFromProperties(TestGroups.EDGE, properties2);
        final Value value3 = elementConverter.getValueFromProperties(TestGroups.EDGE, properties3);
        final Value value4 = elementConverter.getValueFromProperties(TestGroups.EDGE_2, properties1);
        final Value value5 = elementConverter.getValueFromProperties(TestGroups.EDGE_2, properties2);
        //Create mutation
        final Mutation m1 = new Mutation(key.getRow());
        m1.put(key.getColumnFamily(), key.getColumnQualifier(), new ColumnVisibility(key.getColumnVisibility()), key.getTimestamp(), value1);
        final Mutation m2 = new Mutation(key.getRow());
        m2.put(key.getColumnFamily(), key.getColumnQualifier(), new ColumnVisibility(key.getColumnVisibility()), key.getTimestamp(), value2);
        final Mutation m3 = new Mutation(key.getRow());
        m3.put(key.getColumnFamily(), key.getColumnQualifier(), new ColumnVisibility(key.getColumnVisibility()), key.getTimestamp(), value3);
        final Mutation m4 = new Mutation(key2.getRow());
        m4.put(key2.getColumnFamily(), key2.getColumnQualifier(), new ColumnVisibility(key2.getColumnVisibility()), key2.getTimestamp(), value1);
        final Mutation m5 = new Mutation(key.getRow());
        m5.put(key3.getColumnFamily(), key3.getColumnQualifier(), new ColumnVisibility(key3.getColumnVisibility()), key3.getTimestamp(), value1);
        final Mutation m6 = new Mutation(key4.getRow());
        m6.put(key4.getColumnFamily(), key4.getColumnQualifier(), new ColumnVisibility(key4.getColumnVisibility()), key4.getTimestamp(), value4);
        final Mutation m7 = new Mutation(key5.getRow());
        m7.put(key5.getColumnFamily(), key5.getColumnQualifier(), new ColumnVisibility(key5.getColumnVisibility()), key5.getTimestamp(), value5);
        final Mutation m8 = new Mutation(key6.getRow());
        m8.put(key6.getColumnFamily(), key6.getColumnQualifier(), new ColumnVisibility(key6.getColumnVisibility()), key6.getTimestamp(), value5);
        final Mutation m9 = new Mutation(key7.getRow());
        m9.put(key7.getColumnFamily(), key7.getColumnQualifier(), new ColumnVisibility(key7.getColumnVisibility()), key7.getTimestamp(), value5);
        // Write mutation
        final BatchWriterConfig writerConfig = new BatchWriterConfig();
        writerConfig.setMaxMemory(1000000L);
        writerConfig.setMaxLatency(1000L, TimeUnit.MILLISECONDS);
        writerConfig.setMaxWriteThreads(1);
        final BatchWriter writer = store.getConnection().createBatchWriter(store.getProperties().getTable(), writerConfig);
        writer.addMutation(m1);
        writer.addMutation(m2);
        writer.addMutation(m3);
        writer.addMutation(m4);
        writer.addMutation(m5);
        writer.addMutation(m6);
        writer.addMutation(m7);
        writer.addMutation(m8);
        writer.addMutation(m9);
        writer.close();
        // Read data back and check we get one merged element
        final Authorizations authorizations = new Authorizations(visibilityString);
        final BatchScanner scanner = store.getConnection().createBatchScanner(store.getProperties().getTable(), authorizations, 1000);
        try {
            scanner.addScanIterator(store.getKeyPackage().getIteratorFactory().getRowIDAggregatorIteratorSetting(store, "BasicEdge2"));
        } catch (IteratorSettingException e) {
            fail(e.getMessage());
        }
        final RangeFactory rangeF = store.getKeyPackage().getRangeFactory();
        final Range r = rangeF.getRangeFromPair(new Pair<ElementSeed>((new EntitySeed("1")), new EntitySeed("4")), new SummariseGroupOverRanges());
        final Range r2 = rangeF.getRangeFromPair(new Pair<ElementSeed>((new EntitySeed("5")), new EntitySeed("5")), new SummariseGroupOverRanges());
        scanner.setRanges(Arrays.asList(r, r2));
        final Iterator<Entry<Key, Value>> it = scanner.iterator();
        Entry<Key, Value> entry = it.next();
        Element readEdge = elementConverter.getFullElement(entry.getKey(), entry.getValue());
        Edge expectedEdge = new Edge("BasicEdge2");
        expectedEdge.setSource("4");
        expectedEdge.setDestination("8");
        expectedEdge.setDirected(true);
        expectedEdge.putProperty(AccumuloPropertyNames.COLUMN_QUALIFIER, 5);
        expectedEdge.putProperty(AccumuloPropertyNames.COUNT, 3);
        assertEquals(expectedEdge, readEdge);
        assertEquals(5, readEdge.getProperty(AccumuloPropertyNames.COLUMN_QUALIFIER));
        assertEquals(3, readEdge.getProperty(AccumuloPropertyNames.COUNT));
        // Check we get the Result of the second provided range
        assertTrue(it.hasNext());
        entry = it.next();
        readEdge = elementConverter.getFullElement(entry.getKey(), entry.getValue());
        expectedEdge = new Edge("BasicEdge2");
        expectedEdge.setSource("5");
        expectedEdge.setDestination("9");
        expectedEdge.setDirected(true);
        expectedEdge.putProperty(AccumuloPropertyNames.COLUMN_QUALIFIER, 2);
        expectedEdge.putProperty(AccumuloPropertyNames.COUNT, 1);
        assertEquals(expectedEdge, readEdge);
        //Check no additional rows are found. (For a table of this size we shouldn't see this)
        if (it.hasNext()) {
            fail("Additional row found.");
        }
    } catch (AccumuloException | TableExistsException | TableNotFoundException e) {
        fail(this.getClass().getSimpleName() + " failed with exception: " + e);
    }
}
Also used : Element(uk.gov.gchq.gaffer.data.element.Element) BatchScanner(org.apache.accumulo.core.client.BatchScanner) Properties(uk.gov.gchq.gaffer.data.element.Properties) AccumuloProperties(uk.gov.gchq.gaffer.accumulostore.AccumuloProperties) RangeFactory(uk.gov.gchq.gaffer.accumulostore.key.RangeFactory) TableNotFoundException(org.apache.accumulo.core.client.TableNotFoundException) Entry(java.util.Map.Entry) SummariseGroupOverRanges(uk.gov.gchq.gaffer.accumulostore.operation.impl.SummariseGroupOverRanges) BatchWriterConfig(org.apache.accumulo.core.client.BatchWriterConfig) ColumnVisibility(org.apache.accumulo.core.security.ColumnVisibility) AccumuloException(org.apache.accumulo.core.client.AccumuloException) Authorizations(org.apache.accumulo.core.security.Authorizations) IteratorSettingException(uk.gov.gchq.gaffer.accumulostore.key.exception.IteratorSettingException) Range(org.apache.accumulo.core.data.Range) Value(org.apache.accumulo.core.data.Value) EntitySeed(uk.gov.gchq.gaffer.operation.data.EntitySeed) TableExistsException(org.apache.accumulo.core.client.TableExistsException) ElementSeed(uk.gov.gchq.gaffer.operation.data.ElementSeed) Mutation(org.apache.accumulo.core.data.Mutation) BatchWriter(org.apache.accumulo.core.client.BatchWriter) Edge(uk.gov.gchq.gaffer.data.element.Edge) Key(org.apache.accumulo.core.data.Key)

Example 3 with BatchScanner

use of org.apache.accumulo.core.client.BatchScanner in project Gaffer by gchq.

the class AccumuloRetriever method getScanner.

/**
     * Create a scanner to use used in your query.
     * <p>
     *
     * @param ranges the ranges to get the scanner for
     * @return A {@link org.apache.accumulo.core.client.BatchScanner} for the
     * table specified in the properties with the ranges provided.
     * @throws TableNotFoundException if an accumulo table could not be found
     * @throws StoreException         if a connection to accumulo could not be created.
     */
protected BatchScanner getScanner(final Set<Range> ranges) throws TableNotFoundException, StoreException {
    final BatchScanner scanner = store.getConnection().createBatchScanner(store.getProperties().getTable(), authorisations, store.getProperties().getThreadsForBatchScanner());
    if (iteratorSettings != null) {
        for (final IteratorSetting iteratorSetting : iteratorSettings) {
            if (iteratorSetting != null) {
                scanner.addScanIterator(iteratorSetting);
            }
        }
    }
    scanner.setRanges(ranges);
    // Currently hard links element class to column family position.
    if (IncludeEdgeType.NONE != operation.getIncludeEdges()) {
        for (final String col : operation.getView().getEdgeGroups()) {
            scanner.fetchColumnFamily(new Text(col));
        }
    }
    if (operation.isIncludeEntities()) {
        for (final String col : operation.getView().getEntityGroups()) {
            scanner.fetchColumnFamily(new Text(col));
        }
    }
    return scanner;
}
Also used : IteratorSetting(org.apache.accumulo.core.client.IteratorSetting) BatchScanner(org.apache.accumulo.core.client.BatchScanner) Text(org.apache.hadoop.io.Text)

Aggregations

BatchScanner (org.apache.accumulo.core.client.BatchScanner)3 Key (org.apache.accumulo.core.data.Key)2 Range (org.apache.accumulo.core.data.Range)2 Value (org.apache.accumulo.core.data.Value)2 Text (org.apache.hadoop.io.Text)2 AccumuloColumnConstraint (com.facebook.presto.accumulo.model.AccumuloColumnConstraint)1 Collection (java.util.Collection)1 HashSet (java.util.HashSet)1 Entry (java.util.Map.Entry)1 AccumuloException (org.apache.accumulo.core.client.AccumuloException)1 BatchWriter (org.apache.accumulo.core.client.BatchWriter)1 BatchWriterConfig (org.apache.accumulo.core.client.BatchWriterConfig)1 IteratorSetting (org.apache.accumulo.core.client.IteratorSetting)1 TableExistsException (org.apache.accumulo.core.client.TableExistsException)1 TableNotFoundException (org.apache.accumulo.core.client.TableNotFoundException)1 Mutation (org.apache.accumulo.core.data.Mutation)1 Authorizations (org.apache.accumulo.core.security.Authorizations)1 ColumnVisibility (org.apache.accumulo.core.security.ColumnVisibility)1 AccumuloProperties (uk.gov.gchq.gaffer.accumulostore.AccumuloProperties)1 RangeFactory (uk.gov.gchq.gaffer.accumulostore.key.RangeFactory)1