use of uk.gov.gchq.gaffer.data.element.Element in project Gaffer by gchq.
the class AccumuloIDBetweenSetsRetrieverTest method shouldDealWithFalsePositives.
private void shouldDealWithFalsePositives(final boolean loadIntoMemory, final AccumuloStore store) throws StoreException, AccumuloElementConversionException {
final Set<EntitySeed> seeds = new HashSet<>();
seeds.add(AccumuloTestData.SEED_A0);
seeds.add(AccumuloTestData.SEED_A23);
// positive sensible.
for (int i = 0; i < 10; i++) {
seeds.add(new EntitySeed("abc" + i));
}
// Need to make sure that the Bloom filter we create has the same size and the same number of hashes as the
// one that GraphElementsWithStatisticsWithinSetRetriever creates.
final int numItemsToBeAdded = loadIntoMemory ? seeds.size() : 20;
if (!loadIntoMemory) {
store.getProperties().setMaxEntriesForBatchScanner("20");
}
// Find something that will give a false positive
// Need to repeat the logic used in the getGraphElementsWithStatisticsWithinSet() method.
// Calculate sensible size of filter, aiming for false positive rate of 1 in 10000, with a maximum size of
// maxBloomFilterToPassToAnIterator bytes.
int size = (int) (-numItemsToBeAdded * Math.log(0.0001) / (Math.pow(Math.log(2.0), 2.0)));
size = Math.min(size, store.getProperties().getMaxBloomFilterToPassToAnIterator());
// Work out optimal number of hashes to use in Bloom filter based on size of set - optimal number of hashes is
// (m/n)ln 2 where m is the size of the filter in bits and n is the number of items that will be added to the set.
final int numHashes = Math.max(1, (int) ((size / numItemsToBeAdded) * Math.log(2)));
// Create Bloom filter and add seeds to it
final BloomFilter filter = new BloomFilter(size, numHashes, Hash.MURMUR_HASH);
for (final EntitySeed seed : seeds) {
filter.add(new Key(store.getKeyPackage().getKeyConverter().serialiseVertex(seed.getVertex())));
}
// Test random items against it - should only have to shouldRetieveElementsInRangeBetweenSeeds MAX_SIZE_BLOOM_FILTER / 2 on average before find a
// false positive (but impose an arbitrary limit to avoid an infinite loop if there's a problem).
int count = 0;
int maxNumberOfTries = 50 * store.getProperties().getMaxBloomFilterToPassToAnIterator();
while (count < maxNumberOfTries) {
count++;
if (filter.membershipTest(new Key(("" + count).getBytes()))) {
break;
}
}
if (count == maxNumberOfTries) {
fail("Didn't find a false positive");
}
// False positive is "" + count so create an edge from seeds to that
final Edge edge = new Edge(TestGroups.EDGE, "A0", "" + count, true);
edge.putProperty(AccumuloPropertyNames.COUNT, 1000000);
Set<Element> data = new HashSet<>();
data.add(edge);
final User user = new User();
addElements(data, store, user);
// Now query for all edges in set - shouldn't get the false positive
AbstractAccumuloTwoSetSeededOperation<EntitySeed, Element> op = new GetElementsBetweenSets<>(AccumuloTestData.SEED_A0_SET, seeds, defaultView);
final Set<Element> results = returnElementsFromOperation(store, op, new User(), loadIntoMemory);
// Check results are as expected
assertEquals(2, results.size());
assertThat(results, IsCollectionContaining.hasItems(AccumuloTestData.EDGE_A0_A23, AccumuloTestData.A0_ENTITY));
}
use of uk.gov.gchq.gaffer.data.element.Element in project Gaffer by gchq.
the class AccumuloIDBetweenSetsRetrieverTest method testEdgesWithinSetAAreNotReturned.
private void testEdgesWithinSetAAreNotReturned(final boolean loadIntoMemory, final AccumuloStore store) throws StoreException {
final AbstractAccumuloTwoSetSeededOperation<EntitySeed, Element> op = new GetElementsBetweenSets<>(AccumuloTestData.SEED_A0_A23_SET, AccumuloTestData.SEED_B_SET, defaultView);
final Set<Element> betweenA0A23_B_Results = returnElementsFromOperation(store, op, new User(), loadIntoMemory);
//Should have the two entities A0 A23 but not the edge A0-23
assertEquals(2, betweenA0A23_B_Results.size());
assertThat(betweenA0A23_B_Results, IsCollectionContaining.hasItems(AccumuloTestData.A0_ENTITY, AccumuloTestData.A23_ENTITY));
}
use of uk.gov.gchq.gaffer.data.element.Element in project Gaffer by gchq.
the class AccumuloIDBetweenSetsRetrieverTest method shouldLoadElementsWhenMoreElementsThanFitInBatchScanner.
private void shouldLoadElementsWhenMoreElementsThanFitInBatchScanner(final boolean loadIntoMemory, final AccumuloStore store) throws StoreException {
store.getProperties().setMaxEntriesForBatchScanner("1");
// Query for all edges between the set {A0} and the set {A23}
final AbstractAccumuloTwoSetSeededOperation<EntitySeed, Element> op = new GetElementsBetweenSets<>(AccumuloTestData.SEED_A0_SET, AccumuloTestData.SEED_A23_SET, defaultView);
final Set<Element> betweenA0A23results = returnElementsFromOperation(store, op, new User(), loadIntoMemory);
assertEquals(2, betweenA0A23results.size());
assertThat(betweenA0A23results, IsCollectionContaining.hasItems(AccumuloTestData.EDGE_A0_A23, AccumuloTestData.A0_ENTITY));
// Query for all edges between set {A1} and the set {notpresent} - there shouldn't be any, but
// we will get the entity for A1
final AbstractAccumuloTwoSetSeededOperation<EntitySeed, Element> secondOp = new GetElementsBetweenSets<>(AccumuloTestData.SEED_A1_SET, AccumuloTestData.NOT_PRESENT_ENTITY_SEED_SET, defaultView);
final Set<Element> betweenA1andNotPresentResults = returnElementsFromOperation(store, secondOp, new User(), loadIntoMemory);
assertEquals(1, betweenA1andNotPresentResults.size());
assertThat(betweenA1andNotPresentResults, IsCollectionContaining.hasItem(AccumuloTestData.A1_ENTITY));
// Query for all edges between set {A1} and the set {A2} - there shouldn't be any edges but will
// get the entity for A1
final AbstractAccumuloTwoSetSeededOperation<EntitySeed, Element> thirdOp = new GetElementsBetweenSets<>(AccumuloTestData.SEED_A1_SET, AccumuloTestData.SEED_A2_SET, defaultView);
final Set<Element> betweenA1A2Results = returnElementsFromOperation(store, thirdOp, new User(), loadIntoMemory);
assertEquals(1, betweenA1A2Results.size());
assertThat(betweenA1A2Results, IsCollectionContaining.hasItem(AccumuloTestData.A1_ENTITY));
}
use of uk.gov.gchq.gaffer.data.element.Element in project Gaffer by gchq.
the class AccumuloIDBetweenSetsRetrieverTest method shouldDealWithOutgoingEdgesOnlyOption.
/**
* Tests that the options to set outgoing edges or incoming edges only options work correctly.
*/
private void shouldDealWithOutgoingEdgesOnlyOption(final AccumuloStore store) {
try {
/*
Create table
(this method creates the table, removes the versioning iterator, and adds the SetOfStatisticsCombiner iterator,
and sets the age off iterator to age data off after it is more than ageOffTimeInMilliseconds milliseconds old).
*/
final Set<Element> data = new HashSet<>();
data.add(AccumuloTestData.EDGE_A1_B1);
data.add(AccumuloTestData.EDGE_B2_A2);
addElements(data, store, new User());
// Query for edges between {A1} and {B1}, with outgoing edges only. Should get the edge A1>B1.
final AbstractAccumuloTwoSetSeededOperation<EntitySeed, Element> opA1B1 = new GetElementsBetweenSets<>(AccumuloTestData.SEED_A1_SET, AccumuloTestData.SEED_B1_SET, defaultView);
opA1B1.setIncludeEntities(false);
opA1B1.setIncludeIncomingOutGoing(IncludeIncomingOutgoingType.OUTGOING);
final Set<Element> a1B1OutgoingEdgeResults = returnElementsFromOperation(store, opA1B1, new User(), false);
assertThat(a1B1OutgoingEdgeResults, IsCollectionContaining.hasItem(AccumuloTestData.EDGE_A1_B1));
// Query for edges between {A1} and {B1}, with incoming edges only. Should get nothing.
opA1B1.setIncludeIncomingOutGoing(IncludeIncomingOutgoingType.INCOMING);
final Set<Element> a1B1EdgeIncomingResults = returnElementsFromOperation(store, opA1B1, new User(), false);
assertEquals(0, a1B1EdgeIncomingResults.size());
// Query for edges between {A2} and {B2}, with incoming edges only. Should get the edge B2->A2.
final AbstractAccumuloTwoSetSeededOperation<EntitySeed, Element> opA2B2 = new GetElementsBetweenSets<>(AccumuloTestData.SEED_A2_SET, AccumuloTestData.SEED_B2_SET, defaultView);
opA2B2.setIncludeEntities(false);
opA2B2.setIncludeIncomingOutGoing(IncludeIncomingOutgoingType.INCOMING);
final Set<Element> a2B2EdgeIncomingResults = returnElementsFromOperation(store, opA2B2, new User(), false);
assertThat(a2B2EdgeIncomingResults, IsCollectionContaining.hasItem(AccumuloTestData.EDGE_B2_A2));
// Query for edges between {A2} and {B2}, with outgoing edges only. Should get nothing.
opA2B2.setIncludeIncomingOutGoing(IncludeIncomingOutgoingType.OUTGOING);
final Set<Element> a2B2EdgeOutgoingResults = returnElementsFromOperation(store, opA2B2, new User(), false);
assertEquals(0, a2B2EdgeOutgoingResults.size());
} catch (StoreException e) {
fail("Failed to set up graph in Accumulo with exception: " + e);
}
}
use of uk.gov.gchq.gaffer.data.element.Element in project Gaffer by gchq.
the class AccumuloSingleIDRetrieverTest method testEntitySeedQueryOutgoingEdgesOnly.
private void testEntitySeedQueryOutgoingEdgesOnly(final AccumuloStore store) throws AccumuloException, StoreException {
setupGraph(store, numEntries);
final User user = new User();
// Create set to query for
Set<ElementSeed> ids = new HashSet<>();
for (int i = 0; i < numEntries; i++) {
ids.add(new EntitySeed("" + i));
}
final View view = new View.Builder().edge(TestGroups.EDGE).entity(TestGroups.ENTITY).build();
AccumuloSingleIDRetriever retriever = null;
GetElements<ElementSeed, ?> operation = new GetElements<>(view, ids);
operation.setIncludeEntities(false);
operation.setIncludeIncomingOutGoing(IncludeIncomingOutgoingType.OUTGOING);
try {
retriever = new AccumuloSingleIDRetriever(store, operation, user);
} catch (IteratorSettingException e) {
e.printStackTrace();
}
int count = 0;
for (final Element element : retriever) {
count++;
assertEquals(TestGroups.EDGE, element.getGroup());
}
//Should find both i-B and i-C edges.
assertEquals(numEntries * 2, count);
}
Aggregations