Search in sources :

Example 1 with GetElementsBetweenSets

use of uk.gov.gchq.gaffer.accumulostore.operation.impl.GetElementsBetweenSets in project Gaffer by gchq.

the class GetElementsBetweenSetsHandlerTest method shouldReturnSummarisedElements.

private void shouldReturnSummarisedElements(final AccumuloStore store) throws OperationException {
    final View opView = new View.Builder(defaultView).entity(TestGroups.ENTITY, new ViewElementDefinition.Builder().groupBy().build()).edge(TestGroups.EDGE, new ViewElementDefinition.Builder().groupBy().build()).build();
    final GetElementsBetweenSets<Element> op = new GetElementsBetweenSets<>(seedsA, seedsB, opView);
    final GetElementsBetweenSetsHandler handler = new GetElementsBetweenSetsHandler();
    final CloseableIterable<Element> elements = handler.doOperation(op, user, store);
    //With query compaction the result size should be 2
    assertEquals(2, Iterables.size(elements));
    assertThat(elements, IsCollectionContaining.hasItems(expectedSummarisedEdge, expectedEntity1));
    elements.close();
}
Also used : GetElementsBetweenSets(uk.gov.gchq.gaffer.accumulostore.operation.impl.GetElementsBetweenSets) Element(uk.gov.gchq.gaffer.data.element.Element) ViewElementDefinition(uk.gov.gchq.gaffer.data.elementdefinition.view.ViewElementDefinition) View(uk.gov.gchq.gaffer.data.elementdefinition.view.View)

Example 2 with GetElementsBetweenSets

use of uk.gov.gchq.gaffer.accumulostore.operation.impl.GetElementsBetweenSets in project Gaffer by gchq.

the class AccumuloIDBetweenSetsRetrieverTest method shouldDealWithDirectedEdgesOnlyOption.

private void shouldDealWithDirectedEdgesOnlyOption(boolean loadIntoMemory, AccumuloStore store) {
    try {
        final Set<Element> data = new HashSet<>();
        data.add(AccumuloTestData.EDGE_A_B_1);
        data.add(AccumuloTestData.EDGE_A_B_2);
        addElements(data, store, new User());
        // Set undirected edges only option, and query for edges between {A} and {B} - should get EDGE_B2_A2
        final AbstractAccumuloTwoSetSeededOperation<EntitySeed, Element> op = new GetElementsBetweenSets<>(AccumuloTestData.SEED_A_SET, AccumuloTestData.SEED_B_SET, defaultView);
        op.setIncludeEdges(IncludeEdgeType.UNDIRECTED);
        op.setIncludeEntities(false);
        final Set<Element> results = returnElementsFromOperation(store, op, new User(), loadIntoMemory);
        assertThat(results, IsCollectionContaining.hasItem(AccumuloTestData.EDGE_A_B_2));
        op.setIncludeEdges(IncludeEdgeType.DIRECTED);
        final Set<Element> secondResults = returnElementsFromOperation(store, op, new User(), loadIntoMemory);
        assertThat(secondResults, IsCollectionContaining.hasItem(AccumuloTestData.EDGE_A_B_1));
        // Turn off directed / undirected edges only option and check get both EDGE_A1_B1 and EDGE_B2_A2
        op.setIncludeEdges(IncludeEdgeType.ALL);
        final Set<Element> thirdResults = returnElementsFromOperation(store, op, new User(), loadIntoMemory);
        assertThat(thirdResults, IsCollectionContaining.hasItem(AccumuloTestData.EDGE_A_B_2));
    } catch (StoreException e) {
        fail("Failed to set up graph in Accumulo with exception: " + e);
    }
}
Also used : User(uk.gov.gchq.gaffer.user.User) GetElementsBetweenSets(uk.gov.gchq.gaffer.accumulostore.operation.impl.GetElementsBetweenSets) Element(uk.gov.gchq.gaffer.data.element.Element) EntitySeed(uk.gov.gchq.gaffer.operation.data.EntitySeed) HashSet(java.util.HashSet) StoreException(uk.gov.gchq.gaffer.store.StoreException)

Example 3 with GetElementsBetweenSets

use of uk.gov.gchq.gaffer.accumulostore.operation.impl.GetElementsBetweenSets in project Gaffer by gchq.

the class AccumuloIDBetweenSetsRetrieverTest method shouldDealWithFalsePositives.

private void shouldDealWithFalsePositives(final boolean loadIntoMemory, final AccumuloStore store) throws StoreException, AccumuloElementConversionException {
    final Set<EntitySeed> seeds = new HashSet<>();
    seeds.add(AccumuloTestData.SEED_A0);
    seeds.add(AccumuloTestData.SEED_A23);
    // positive sensible.
    for (int i = 0; i < 10; i++) {
        seeds.add(new EntitySeed("abc" + i));
    }
    // Need to make sure that the Bloom filter we create has the same size and the same number of hashes as the
    // one that GraphElementsWithStatisticsWithinSetRetriever creates.
    final int numItemsToBeAdded = loadIntoMemory ? seeds.size() : 20;
    if (!loadIntoMemory) {
        store.getProperties().setMaxEntriesForBatchScanner("20");
    }
    // Find something that will give a false positive
    // Need to repeat the logic used in the getGraphElementsWithStatisticsWithinSet() method.
    // Calculate sensible size of filter, aiming for false positive rate of 1 in 10000, with a maximum size of
    // maxBloomFilterToPassToAnIterator bytes.
    int size = (int) (-numItemsToBeAdded * Math.log(0.0001) / (Math.pow(Math.log(2.0), 2.0)));
    size = Math.min(size, store.getProperties().getMaxBloomFilterToPassToAnIterator());
    // Work out optimal number of hashes to use in Bloom filter based on size of set - optimal number of hashes is
    // (m/n)ln 2 where m is the size of the filter in bits and n is the number of items that will be added to the set.
    final int numHashes = Math.max(1, (int) ((size / numItemsToBeAdded) * Math.log(2)));
    // Create Bloom filter and add seeds to it
    final BloomFilter filter = new BloomFilter(size, numHashes, Hash.MURMUR_HASH);
    for (final EntitySeed seed : seeds) {
        filter.add(new Key(store.getKeyPackage().getKeyConverter().serialiseVertex(seed.getVertex())));
    }
    // Test random items against it - should only have to shouldRetieveElementsInRangeBetweenSeeds MAX_SIZE_BLOOM_FILTER / 2 on average before find a
    // false positive (but impose an arbitrary limit to avoid an infinite loop if there's a problem).
    int count = 0;
    int maxNumberOfTries = 50 * store.getProperties().getMaxBloomFilterToPassToAnIterator();
    while (count < maxNumberOfTries) {
        count++;
        if (filter.membershipTest(new Key(("" + count).getBytes()))) {
            break;
        }
    }
    if (count == maxNumberOfTries) {
        fail("Didn't find a false positive");
    }
    // False positive is "" + count so create an edge from seeds to that
    final Edge edge = new Edge(TestGroups.EDGE, "A0", "" + count, true);
    edge.putProperty(AccumuloPropertyNames.COUNT, 1000000);
    Set<Element> data = new HashSet<>();
    data.add(edge);
    final User user = new User();
    addElements(data, store, user);
    // Now query for all edges in set - shouldn't get the false positive
    AbstractAccumuloTwoSetSeededOperation<EntitySeed, Element> op = new GetElementsBetweenSets<>(AccumuloTestData.SEED_A0_SET, seeds, defaultView);
    final Set<Element> results = returnElementsFromOperation(store, op, new User(), loadIntoMemory);
    // Check results are as expected
    assertEquals(2, results.size());
    assertThat(results, IsCollectionContaining.hasItems(AccumuloTestData.EDGE_A0_A23, AccumuloTestData.A0_ENTITY));
}
Also used : User(uk.gov.gchq.gaffer.user.User) GetElementsBetweenSets(uk.gov.gchq.gaffer.accumulostore.operation.impl.GetElementsBetweenSets) Element(uk.gov.gchq.gaffer.data.element.Element) EntitySeed(uk.gov.gchq.gaffer.operation.data.EntitySeed) Edge(uk.gov.gchq.gaffer.data.element.Edge) BloomFilter(org.apache.hadoop.util.bloom.BloomFilter) Key(org.apache.hadoop.util.bloom.Key) HashSet(java.util.HashSet)

Example 4 with GetElementsBetweenSets

use of uk.gov.gchq.gaffer.accumulostore.operation.impl.GetElementsBetweenSets in project Gaffer by gchq.

the class AccumuloIDBetweenSetsRetrieverTest method testEdgesWithinSetAAreNotReturned.

private void testEdgesWithinSetAAreNotReturned(final boolean loadIntoMemory, final AccumuloStore store) throws StoreException {
    final AbstractAccumuloTwoSetSeededOperation<EntitySeed, Element> op = new GetElementsBetweenSets<>(AccumuloTestData.SEED_A0_A23_SET, AccumuloTestData.SEED_B_SET, defaultView);
    final Set<Element> betweenA0A23_B_Results = returnElementsFromOperation(store, op, new User(), loadIntoMemory);
    //Should have the two entities A0 A23 but not the edge A0-23
    assertEquals(2, betweenA0A23_B_Results.size());
    assertThat(betweenA0A23_B_Results, IsCollectionContaining.hasItems(AccumuloTestData.A0_ENTITY, AccumuloTestData.A23_ENTITY));
}
Also used : User(uk.gov.gchq.gaffer.user.User) GetElementsBetweenSets(uk.gov.gchq.gaffer.accumulostore.operation.impl.GetElementsBetweenSets) Element(uk.gov.gchq.gaffer.data.element.Element) EntitySeed(uk.gov.gchq.gaffer.operation.data.EntitySeed)

Example 5 with GetElementsBetweenSets

use of uk.gov.gchq.gaffer.accumulostore.operation.impl.GetElementsBetweenSets in project Gaffer by gchq.

the class AccumuloIDBetweenSetsRetrieverTest method shouldLoadElementsWhenMoreElementsThanFitInBatchScanner.

private void shouldLoadElementsWhenMoreElementsThanFitInBatchScanner(final boolean loadIntoMemory, final AccumuloStore store) throws StoreException {
    store.getProperties().setMaxEntriesForBatchScanner("1");
    // Query for all edges between the set {A0} and the set {A23}
    final AbstractAccumuloTwoSetSeededOperation<EntitySeed, Element> op = new GetElementsBetweenSets<>(AccumuloTestData.SEED_A0_SET, AccumuloTestData.SEED_A23_SET, defaultView);
    final Set<Element> betweenA0A23results = returnElementsFromOperation(store, op, new User(), loadIntoMemory);
    assertEquals(2, betweenA0A23results.size());
    assertThat(betweenA0A23results, IsCollectionContaining.hasItems(AccumuloTestData.EDGE_A0_A23, AccumuloTestData.A0_ENTITY));
    // Query for all edges between set {A1} and the set {notpresent} - there shouldn't be any, but
    // we will get the entity for A1
    final AbstractAccumuloTwoSetSeededOperation<EntitySeed, Element> secondOp = new GetElementsBetweenSets<>(AccumuloTestData.SEED_A1_SET, AccumuloTestData.NOT_PRESENT_ENTITY_SEED_SET, defaultView);
    final Set<Element> betweenA1andNotPresentResults = returnElementsFromOperation(store, secondOp, new User(), loadIntoMemory);
    assertEquals(1, betweenA1andNotPresentResults.size());
    assertThat(betweenA1andNotPresentResults, IsCollectionContaining.hasItem(AccumuloTestData.A1_ENTITY));
    // Query for all edges between set {A1} and the set {A2} - there shouldn't be any edges but will
    // get the entity for A1
    final AbstractAccumuloTwoSetSeededOperation<EntitySeed, Element> thirdOp = new GetElementsBetweenSets<>(AccumuloTestData.SEED_A1_SET, AccumuloTestData.SEED_A2_SET, defaultView);
    final Set<Element> betweenA1A2Results = returnElementsFromOperation(store, thirdOp, new User(), loadIntoMemory);
    assertEquals(1, betweenA1A2Results.size());
    assertThat(betweenA1A2Results, IsCollectionContaining.hasItem(AccumuloTestData.A1_ENTITY));
}
Also used : User(uk.gov.gchq.gaffer.user.User) GetElementsBetweenSets(uk.gov.gchq.gaffer.accumulostore.operation.impl.GetElementsBetweenSets) Element(uk.gov.gchq.gaffer.data.element.Element) EntitySeed(uk.gov.gchq.gaffer.operation.data.EntitySeed)

Aggregations

GetElementsBetweenSets (uk.gov.gchq.gaffer.accumulostore.operation.impl.GetElementsBetweenSets)11 Element (uk.gov.gchq.gaffer.data.element.Element)11 EntitySeed (uk.gov.gchq.gaffer.operation.data.EntitySeed)7 User (uk.gov.gchq.gaffer.user.User)7 View (uk.gov.gchq.gaffer.data.elementdefinition.view.View)5 ViewElementDefinition (uk.gov.gchq.gaffer.data.elementdefinition.view.ViewElementDefinition)4 HashSet (java.util.HashSet)3 StoreException (uk.gov.gchq.gaffer.store.StoreException)2 BloomFilter (org.apache.hadoop.util.bloom.BloomFilter)1 Key (org.apache.hadoop.util.bloom.Key)1 Edge (uk.gov.gchq.gaffer.data.element.Edge)1