Search in sources :

Example 21 with GetElements

use of uk.gov.gchq.gaffer.operation.impl.get.GetElements in project Gaffer by gchq.

the class AccumuloIDWithinSetRetrieverTest method shouldDealWithFalsePositives.

private void shouldDealWithFalsePositives(final boolean loadIntoMemory, final AccumuloStore store) throws StoreException, AccumuloElementConversionException {
    // Query for all edges in set {A0, A23}
    final Set<EntitySeed> seeds = new HashSet<>();
    seeds.add(AccumuloTestData.SEED_A0);
    seeds.add(AccumuloTestData.SEED_A23);
    // positive sensible.
    for (int i = 0; i < 10; i++) {
        seeds.add(new EntitySeed("abc" + i));
    }
    // Need to make sure that the Bloom filter we create has the same size and the same number of hashes as the
    // one that GraphElementsWithStatisticsWithinSetRetriever creates.
    final int numItemsToBeAdded = loadIntoMemory ? seeds.size() : 20;
    if (!loadIntoMemory) {
        store.getProperties().setMaxEntriesForBatchScanner("20");
    }
    // Find something that will give a false positive
    // Need to repeat the logic used in the getGraphElementsWithStatisticsWithinSet() method.
    // Calculate sensible size of filter, aiming for false positive rate of 1 in 10000, with a maximum size of
    // maxBloomFilterToPassToAnIterator bytes.
    int size = (int) (-numItemsToBeAdded * Math.log(0.0001) / (Math.pow(Math.log(2.0), 2.0)));
    size = Math.min(size, store.getProperties().getMaxBloomFilterToPassToAnIterator());
    // Work out optimal number of hashes to use in Bloom filter based on size of set - optimal number of hashes is
    // (m/n)ln 2 where m is the size of the filter in bits and n is the number of items that will be added to the set.
    final int numHashes = Math.max(1, (int) ((size / numItemsToBeAdded) * Math.log(2)));
    // Create Bloom filter and add seeds to it
    final BloomFilter filter = new BloomFilter(size, numHashes, Hash.MURMUR_HASH);
    for (final EntitySeed seed : seeds) {
        filter.add(new Key(store.getKeyPackage().getKeyConverter().serialiseVertex(seed.getVertex())));
    }
    // Test random items against it - should only have to shouldRetieveElementsInRangeBetweenSeeds MAX_SIZE_BLOOM_FILTER / 2 on average before find a
    // false positive (but impose an arbitrary limit to avoid an infinite loop if there's a problem).
    int count = 0;
    int maxNumberOfTries = 50 * store.getProperties().getMaxBloomFilterToPassToAnIterator();
    while (count < maxNumberOfTries) {
        count++;
        if (filter.membershipTest(new Key(("" + count).getBytes()))) {
            break;
        }
    }
    if (count == maxNumberOfTries) {
        fail("Didn't find a false positive");
    }
    // False positive is "" + count so create an edge from seeds to that
    final GetElements<EntitySeed, ?> op = new GetElements<>(defaultView, seeds);
    // Now query for all edges in set - shouldn't get the false positive
    final Set<Element> results = returnElementsFromOperation(store, op, new User(), loadIntoMemory);
    // Check results are as expected
    assertThat(results, IsCollectionContaining.hasItems(AccumuloTestData.EDGE_A0_A23, AccumuloTestData.A0_ENTITY, AccumuloTestData.A23_ENTITY));
}
Also used : User(uk.gov.gchq.gaffer.user.User) Element(uk.gov.gchq.gaffer.data.element.Element) EntitySeed(uk.gov.gchq.gaffer.operation.data.EntitySeed) GetElements(uk.gov.gchq.gaffer.operation.impl.get.GetElements) BloomFilter(org.apache.hadoop.util.bloom.BloomFilter) Key(org.apache.hadoop.util.bloom.Key) HashSet(java.util.HashSet)

Example 22 with GetElements

use of uk.gov.gchq.gaffer.operation.impl.get.GetElements in project Gaffer by gchq.

the class AccumuloIDWithinSetRetrieverTest method shouldLoadElementsWhenMoreElementsThanFitInBatchScanner.

private void shouldLoadElementsWhenMoreElementsThanFitInBatchScanner(final boolean loadIntoMemory, final AccumuloStore store) throws StoreException {
    store.getProperties().setMaxEntriesForBatchScanner("1");
    // Query for all edges in set {A0, A23}
    final Set<EntitySeed> seeds = new HashSet<>();
    seeds.add(AccumuloTestData.SEED_A0);
    seeds.add(AccumuloTestData.SEED_A23);
    final GetElements<EntitySeed, ?> op = new GetElements<>(defaultView, seeds);
    final Set<Element> results = returnElementsFromOperation(store, op, new User(), loadIntoMemory);
    assertThat(results, IsCollectionContaining.hasItems(AccumuloTestData.EDGE_A0_A23, AccumuloTestData.A0_ENTITY, AccumuloTestData.A23_ENTITY));
    // Query for all edges in set {A1} - there shouldn't be any, but we will get the entity for A1
    final GetElements<EntitySeed, ?> a1Operation = new GetElements<>(defaultView, AccumuloTestData.SEED_A1_SET);
    final Set<Element> a1Results = returnElementsFromOperation(store, a1Operation, new User(), loadIntoMemory);
    assertEquals(1, a1Results.size());
    assertThat(a1Results, IsCollectionContaining.hasItem(AccumuloTestData.A1_ENTITY));
    // Query for all edges in set {A1, A2} - there shouldn't be any edges but will
    // get the two entities
    final Set<EntitySeed> a1A2Seeds = new HashSet<>();
    a1A2Seeds.add(AccumuloTestData.SEED_A1);
    a1A2Seeds.add(AccumuloTestData.SEED_A2);
    final GetElements<EntitySeed, ?> a1A23Operation = new GetElements<>(defaultView, a1A2Seeds);
    final Set<Element> a1A23Results = returnElementsFromOperation(store, a1A23Operation, new User(), loadIntoMemory);
    assertEquals(2, a1A23Results.size());
    assertThat(a1A23Results, IsCollectionContaining.hasItems(AccumuloTestData.A1_ENTITY, AccumuloTestData.A2_ENTITY));
}
Also used : User(uk.gov.gchq.gaffer.user.User) Element(uk.gov.gchq.gaffer.data.element.Element) EntitySeed(uk.gov.gchq.gaffer.operation.data.EntitySeed) GetElements(uk.gov.gchq.gaffer.operation.impl.get.GetElements) HashSet(java.util.HashSet)

Example 23 with GetElements

use of uk.gov.gchq.gaffer.operation.impl.get.GetElements in project Gaffer by gchq.

the class AccumuloIDWithinSetRetrieverTest method shouldDealWithOutgoingEdgesOnlyOption.

private void shouldDealWithOutgoingEdgesOnlyOption(final AccumuloStore store) {
    try {
        // Set outgoing edges only option, and query for the set {C,D}.
        final Set<EntitySeed> seeds = new HashSet<>();
        seeds.add(new EntitySeed("C"));
        seeds.add(new EntitySeed("D"));
        final Set<Element> expectedResults = new HashSet<>();
        expectedResults.add(AccumuloTestData.EDGE_C_D_DIRECTED);
        expectedResults.add(AccumuloTestData.EDGE_C_D_UNDIRECTED);
        final GetElements<EntitySeed, ?> op = new GetElements<>(defaultView, seeds);
        op.setIncludeIncomingOutGoing(IncludeIncomingOutgoingType.OUTGOING);
        final Set<Element> results = returnElementsFromOperation(store, op, new User(), true);
        assertEquals(expectedResults, results);
        // Set set edges only option, and query for the set {C,D}.
        op.setIncludeIncomingOutGoing(GetOperation.IncludeIncomingOutgoingType.INCOMING);
        final Set<Element> incomingResults = returnElementsFromOperation(store, op, new User(), false);
        assertEquals(expectedResults, incomingResults);
    } catch (StoreException e) {
        fail("Failed to set up graph in Accumulo with exception: " + e);
    }
}
Also used : User(uk.gov.gchq.gaffer.user.User) Element(uk.gov.gchq.gaffer.data.element.Element) EntitySeed(uk.gov.gchq.gaffer.operation.data.EntitySeed) GetElements(uk.gov.gchq.gaffer.operation.impl.get.GetElements) HashSet(java.util.HashSet) StoreException(uk.gov.gchq.gaffer.store.StoreException)

Example 24 with GetElements

use of uk.gov.gchq.gaffer.operation.impl.get.GetElements in project Gaffer by gchq.

the class AccumuloSingleIDRetrieverTest method testEntitySeedQueryEntitiesOnly.

private void testEntitySeedQueryEntitiesOnly(final AccumuloStore store) throws AccumuloException, StoreException {
    setupGraph(store, numEntries);
    final User user = new User();
    // Create set to query for
    final Set<ElementSeed> ids = new HashSet<>();
    for (int i = 0; i < numEntries; i++) {
        ids.add(new EntitySeed("" + i));
    }
    final View view = new View.Builder().edge(TestGroups.EDGE).entity(TestGroups.ENTITY).build();
    AccumuloSingleIDRetriever retriever = null;
    final GetElements<ElementSeed, ?> operation = new GetElements<>(view, ids);
    operation.setIncludeEntities(true);
    operation.setIncludeEdges(IncludeEdgeType.NONE);
    try {
        retriever = new AccumuloSingleIDRetriever(store, operation, user);
    } catch (IteratorSettingException e) {
        e.printStackTrace();
    }
    //Should find only the entities i
    assertEquals(numEntries, Iterables.size(retriever));
}
Also used : User(uk.gov.gchq.gaffer.user.User) EntitySeed(uk.gov.gchq.gaffer.operation.data.EntitySeed) ElementSeed(uk.gov.gchq.gaffer.operation.data.ElementSeed) GetElements(uk.gov.gchq.gaffer.operation.impl.get.GetElements) IteratorSettingException(uk.gov.gchq.gaffer.accumulostore.key.exception.IteratorSettingException) View(uk.gov.gchq.gaffer.data.elementdefinition.view.View) HashSet(java.util.HashSet)

Example 25 with GetElements

use of uk.gov.gchq.gaffer.operation.impl.get.GetElements in project Gaffer by gchq.

the class FilteringIT method testPostAggregationFilteringProperties.

@Test
@TraitRequirement({ StoreTrait.POST_AGGREGATION_FILTERING, StoreTrait.STORE_AGGREGATION })
public void testPostAggregationFilteringProperties() throws OperationException {
    // Given
    final List<ElementSeed> seeds = Arrays.asList(new EntitySeed("A3"), new EdgeSeed("A5", "B5", false));
    final GetElements<ElementSeed, Element> getElementsWithoutFiltering = new GetElements.Builder<>().seeds(seeds).build();
    final GetElements<ElementSeed, Element> getElementsWithFiltering = new GetElements.Builder<>().seeds(seeds).view(new View.Builder().entity(TestGroups.ENTITY, new ViewElementDefinition.Builder().postAggregationFilter(new ElementFilter.Builder().select(IdentifierType.VERTEX.name()).execute(new IsEqual("A5")).build()).build()).edge(TestGroups.EDGE, new ViewElementDefinition.Builder().postAggregationFilter(new ElementFilter.Builder().select(TestPropertyNames.INT).execute(new IsLessThan(2)).build()).build()).build()).build();
    // When - without filtering
    final List<Element> resultsWithoutFiltering = Lists.newArrayList(graph.execute(getElementsWithoutFiltering, getUser()));
    // When - with filtering
    final List<Element> resultsWithFiltering = Lists.newArrayList(graph.execute(getElementsWithFiltering, getUser()));
    // Then - without filtering
    assertNotNull(resultsWithoutFiltering);
    assertEquals(8, resultsWithoutFiltering.size());
    assertThat(resultsWithoutFiltering, IsCollectionContaining.hasItems(getEdge("A3", "A3", false), getEdge("A3", "B3", false), getEdge("A3", "C3", false), getEdge("A3", "D3", false), getEdge("A5", "B5", false), getEntity("A5"), getEntity("B5")));
    // Then - with filtering
    assertNotNull(resultsWithFiltering);
    assertEquals(6, resultsWithFiltering.size());
    assertThat(resultsWithFiltering, IsCollectionContaining.hasItems(getEdge("A3", "A3", false), getEdge("A3", "B3", false), getEdge("A5", "B5", false), getEdge("A3", "D3", false), getEdge("A3", "C3", false), getEntity("A5")));
}
Also used : Element(uk.gov.gchq.gaffer.data.element.Element) GetElements(uk.gov.gchq.gaffer.operation.impl.get.GetElements) ViewElementDefinition(uk.gov.gchq.gaffer.data.elementdefinition.view.ViewElementDefinition) View(uk.gov.gchq.gaffer.data.elementdefinition.view.View) IsEqual(uk.gov.gchq.gaffer.function.filter.IsEqual) IsLessThan(uk.gov.gchq.gaffer.function.filter.IsLessThan) EdgeSeed(uk.gov.gchq.gaffer.operation.data.EdgeSeed) ElementFilter(uk.gov.gchq.gaffer.data.element.function.ElementFilter) EntitySeed(uk.gov.gchq.gaffer.operation.data.EntitySeed) ElementSeed(uk.gov.gchq.gaffer.operation.data.ElementSeed) Test(org.junit.Test) TraitRequirement(uk.gov.gchq.gaffer.integration.TraitRequirement)

Aggregations

GetElements (uk.gov.gchq.gaffer.operation.impl.get.GetElements)28 Element (uk.gov.gchq.gaffer.data.element.Element)19 EntitySeed (uk.gov.gchq.gaffer.operation.data.EntitySeed)19 User (uk.gov.gchq.gaffer.user.User)16 HashSet (java.util.HashSet)15 View (uk.gov.gchq.gaffer.data.elementdefinition.view.View)14 Test (org.junit.Test)12 ElementSeed (uk.gov.gchq.gaffer.operation.data.ElementSeed)12 AddElements (uk.gov.gchq.gaffer.operation.impl.add.AddElements)8 IteratorSettingException (uk.gov.gchq.gaffer.accumulostore.key.exception.IteratorSettingException)7 OperationChain (uk.gov.gchq.gaffer.operation.OperationChain)5 TraitRequirement (uk.gov.gchq.gaffer.integration.TraitRequirement)4 EdgeSeed (uk.gov.gchq.gaffer.operation.data.EdgeSeed)4 CloseableIterable (uk.gov.gchq.gaffer.commonutil.iterable.CloseableIterable)3 Edge (uk.gov.gchq.gaffer.data.element.Edge)3 Entity (uk.gov.gchq.gaffer.data.element.Entity)3 ElementFilter (uk.gov.gchq.gaffer.data.element.function.ElementFilter)3 GetAdjacentEntitySeeds (uk.gov.gchq.gaffer.operation.impl.get.GetAdjacentEntitySeeds)3 ByteSequence (org.apache.accumulo.core.data.ByteSequence)2 Range (org.apache.accumulo.core.data.Range)2