Search in sources :

Example 86 with EntitySeed

use of uk.gov.gchq.gaffer.operation.data.EntitySeed in project Gaffer by gchq.

the class FilterToOperationConverterTest method testSpecifySourceOrDestinationAndPropertyFilter.

@Test
public void testSpecifySourceOrDestinationAndPropertyFilter() {
    final Schema schema = getSchema();
    final SparkSession sparkSession = SparkSessionProvider.getSparkSession();
    // Specify src and a filter on property1
    Filter[] filters = new Filter[2];
    filters[0] = new GreaterThan("property1", 5);
    filters[1] = new EqualTo(SchemaToStructTypeConverter.SRC_COL_NAME, "0");
    FiltersToOperationConverter converter = new FiltersToOperationConverter(getViewFromSchema(schema), schema, filters);
    Operation operation = converter.getOperation();
    assertTrue(operation instanceof GetRDDOfElements);
    assertEquals(0, ((GraphFilters) operation).getView().getEntityGroups().size());
    assertEquals(2, ((GraphFilters) operation).getView().getEdgeGroups().size());
    final Set<EntityId> seeds = new HashSet<>();
    for (final Object seed : ((GetRDDOfElements) operation).getInput()) {
        seeds.add((EntitySeed) seed);
    }
    assertEquals(Collections.singleton(new EntitySeed("0")), seeds);
    View opView = ((GraphFilters) operation).getView();
    for (final String edgeGroup : EDGE_GROUPS) {
        final List<TupleAdaptedPredicate<String, ?>> edgePostAggFilters = opView.getEdge(edgeGroup).getPostAggregationFilterFunctions();
        assertThat(edgePostAggFilters).hasSize(1);
        assertArrayEquals(new String[] { "property1" }, edgePostAggFilters.get(0).getSelection());
        assertEquals(new IsMoreThan(5, false), edgePostAggFilters.get(0).getPredicate());
    }
    // Specify src and filters on property1 and property4
    filters = new Filter[3];
    filters[0] = new GreaterThan("property1", 5);
    filters[1] = new EqualTo(SchemaToStructTypeConverter.SRC_COL_NAME, "0");
    filters[2] = new LessThan("property4", 8);
    converter = new FiltersToOperationConverter(getViewFromSchema(schema), schema, filters);
    operation = converter.getOperation();
    assertTrue(operation instanceof GetRDDOfElements);
    assertEquals(0, ((GraphFilters) operation).getView().getEntityGroups().size());
    assertEquals(1, ((GraphFilters) operation).getView().getEdgeGroups().size());
    seeds.clear();
    for (final Object seed : ((GetRDDOfElements) operation).getInput()) {
        seeds.add((EntitySeed) seed);
    }
    assertEquals(Collections.singleton(new EntitySeed("0")), seeds);
    opView = ((GraphFilters) operation).getView();
    final List<TupleAdaptedPredicate<String, ?>> entityPostAggFilters = opView.getEdge(EDGE_GROUP).getPostAggregationFilterFunctions();
    assertThat(entityPostAggFilters).hasSize(2);
    final List<String> expectedProperties = new ArrayList<>();
    expectedProperties.add("property1");
    expectedProperties.add("property4");
    assertThat(entityPostAggFilters.get(0).getSelection()).hasSize(1);
    assertEquals(expectedProperties.get(0), entityPostAggFilters.get(0).getSelection()[0]);
    assertEquals(new IsMoreThan(5, false), entityPostAggFilters.get(0).getPredicate());
    assertThat(entityPostAggFilters.get(1).getSelection()).hasSize(1);
    assertEquals(expectedProperties.get(1), entityPostAggFilters.get(1).getSelection()[0]);
    assertEquals(new IsLessThan(8, false), entityPostAggFilters.get(1).getPredicate());
}
Also used : TupleAdaptedPredicate(uk.gov.gchq.koryphe.tuple.predicate.TupleAdaptedPredicate) SparkSession(org.apache.spark.sql.SparkSession) Schema(uk.gov.gchq.gaffer.store.schema.Schema) ArrayList(java.util.ArrayList) GraphFilters(uk.gov.gchq.gaffer.operation.graph.GraphFilters) Operation(uk.gov.gchq.gaffer.operation.Operation) View(uk.gov.gchq.gaffer.data.elementdefinition.view.View) EqualTo(org.apache.spark.sql.sources.EqualTo) EntityId(uk.gov.gchq.gaffer.data.element.id.EntityId) IsLessThan(uk.gov.gchq.koryphe.impl.predicate.IsLessThan) LessThan(org.apache.spark.sql.sources.LessThan) IsLessThan(uk.gov.gchq.koryphe.impl.predicate.IsLessThan) Filter(org.apache.spark.sql.sources.Filter) GreaterThan(org.apache.spark.sql.sources.GreaterThan) EntitySeed(uk.gov.gchq.gaffer.operation.data.EntitySeed) IsMoreThan(uk.gov.gchq.koryphe.impl.predicate.IsMoreThan) GetRDDOfElements(uk.gov.gchq.gaffer.spark.operation.scalardd.GetRDDOfElements) HashSet(java.util.HashSet) Test(org.junit.jupiter.api.Test)

Example 87 with EntitySeed

use of uk.gov.gchq.gaffer.operation.data.EntitySeed in project Gaffer by gchq.

the class QueryGeneratorTest method testQueryGeneratorForGetElementsWithEntitySeeds.

@Test
public void testQueryGeneratorForGetElementsWithEntitySeeds(@TempDir java.nio.file.Path tempDir) throws IOException, OperationException {
    // Given
    // - Create snapshot folder
    final String folder = String.format("file:///%s", tempDir.toString());
    final String snapshotFolder = folder + "/" + ParquetStore.getSnapshotPath(1000L);
    // - Write out Parquet files so know the partitioning
    CalculatePartitionerTest.writeData(snapshotFolder, new SchemaUtils(schema));
    // - Initialise store
    final ParquetStoreProperties storeProperties = new ParquetStoreProperties();
    storeProperties.setDataDir(folder);
    storeProperties.setTempFilesDir(folder + "/tmpdata");
    final ParquetStore store = (ParquetStore) ParquetStore.createStore("graphId", schema, storeProperties);
    // When 1 - no view, query for vertex 0
    GetElements getElements = new GetElements.Builder().input(new EntitySeed(0L)).seedMatching(SeedMatching.SeedMatchingType.RELATED).build();
    ParquetQuery query = new QueryGenerator(store).getParquetQuery(getElements);
    // Then 1
    final List expected = new ArrayList<>();
    final FilterPredicate vertex0 = eq(FilterApi.longColumn(ParquetStore.VERTEX), 0L);
    final FilterPredicate source0 = eq(FilterApi.longColumn(ParquetStore.SOURCE), 0L);
    final FilterPredicate destination0 = eq(FilterApi.longColumn(ParquetStore.DESTINATION), 0L);
    for (final String group : Arrays.asList(TestGroups.ENTITY, TestGroups.ENTITY_2)) {
        final Path groupFolderPath = new Path(snapshotFolder, ParquetStore.getGroupSubDir(group, false));
        final Path pathForPartitionFile = new Path(groupFolderPath, ParquetStore.getFile(0));
        expected.add(new ParquetFileQuery(pathForPartitionFile, vertex0, true));
    }
    for (final String group : Arrays.asList(TestGroups.EDGE, TestGroups.EDGE_2)) {
        final Path groupFolderPath = new Path(snapshotFolder, ParquetStore.getGroupSubDir(group, false));
        final Path pathForPartitionFile = new Path(groupFolderPath, ParquetStore.getFile(0));
        expected.add(new ParquetFileQuery(pathForPartitionFile, source0, true));
        final Path reversedGroupFolderPath = new Path(snapshotFolder, ParquetStore.getGroupSubDir(group, true));
        final Path pathForReversedPartitionFile = new Path(reversedGroupFolderPath, ParquetStore.getFile(0));
        expected.add(new ParquetFileQuery(pathForReversedPartitionFile, destination0, true));
    }
    assertThat(expected).containsOnly(query.getAllParquetFileQueries().toArray());
    // When 2 - no view, query for vertices 0 and 1000000
    getElements = new GetElements.Builder().input(new EntitySeed(0L), new EntitySeed(1000000L)).seedMatching(SeedMatching.SeedMatchingType.RELATED).build();
    query = new QueryGenerator(store).getParquetQuery(getElements);
    // Then 2
    expected.clear();
    final FilterPredicate vertex1000000 = eq(FilterApi.longColumn(ParquetStore.VERTEX), 1000000L);
    final FilterPredicate source1000000 = eq(FilterApi.longColumn(ParquetStore.SOURCE), 1000000L);
    final FilterPredicate destination1000000 = eq(FilterApi.longColumn(ParquetStore.DESTINATION), 1000000L);
    for (final String group : Arrays.asList(TestGroups.ENTITY, TestGroups.ENTITY_2)) {
        final Path groupFolderPath = new Path(snapshotFolder, ParquetStore.getGroupSubDir(group, false));
        final Path pathForPartitionFile1 = new Path(groupFolderPath, ParquetStore.getFile(0));
        expected.add(new ParquetFileQuery(pathForPartitionFile1, vertex0, true));
        final Path pathForPartitionFile2 = new Path(groupFolderPath, ParquetStore.getFile(9));
        expected.add(new ParquetFileQuery(pathForPartitionFile2, vertex1000000, true));
    }
    for (final String group : Arrays.asList(TestGroups.EDGE, TestGroups.EDGE_2)) {
        final Path groupFolderPath = new Path(snapshotFolder, ParquetStore.getGroupSubDir(group, false));
        final Path reversedGroupFolderPath = new Path(snapshotFolder, ParquetStore.getGroupSubDir(group, true));
        // Partition 0, vertex 0L
        final Path pathForPartitionFile1 = new Path(groupFolderPath, ParquetStore.getFile(0));
        expected.add(new ParquetFileQuery(pathForPartitionFile1, source0, true));
        // Partition 9, vertex 1000000L
        final Path pathForPartitionFile2 = new Path(groupFolderPath, ParquetStore.getFile(9));
        expected.add(new ParquetFileQuery(pathForPartitionFile2, source1000000, true));
        // Partition 0 of reversed, vertex 0L
        final Path pathForPartitionFile3 = new Path(reversedGroupFolderPath, ParquetStore.getFile(0));
        expected.add(new ParquetFileQuery(pathForPartitionFile3, destination0, true));
        // Partition 9 of reversed, vertex 1000000L
        final Path pathForPartitionFile4 = new Path(reversedGroupFolderPath, ParquetStore.getFile(9));
        expected.add(new ParquetFileQuery(pathForPartitionFile4, destination1000000, true));
    }
    assertThat(expected).containsOnly(query.getAllParquetFileQueries().toArray());
    // When 3 - view with filter that can be pushed down to Parquet, query for vertices 0 and 1000000
    getElements = new GetElements.Builder().input(new EntitySeed(0L), new EntitySeed(1000000L)).seedMatching(SeedMatching.SeedMatchingType.RELATED).view(new View.Builder().edge(TestGroups.EDGE, new ViewElementDefinition.Builder().preAggregationFilter(new ElementFilter.Builder().select("count").execute(new IsMoreThan(10)).build()).build()).build()).build();
    query = new QueryGenerator(store).getParquetQuery(getElements);
    // Then 3
    expected.clear();
    final FilterPredicate source0AndCount = and(gt(FilterApi.intColumn("count"), 10), eq(FilterApi.longColumn(ParquetStore.SOURCE), 0L));
    final FilterPredicate source1000000AndCount = and(gt(FilterApi.intColumn("count"), 10), eq(FilterApi.longColumn(ParquetStore.SOURCE), 1000000L));
    final FilterPredicate destination0AndCount = and(gt(FilterApi.intColumn("count"), 10), eq(FilterApi.longColumn(ParquetStore.DESTINATION), 0L));
    final FilterPredicate destination1000000AndCount = and(gt(FilterApi.intColumn("count"), 10), eq(FilterApi.longColumn(ParquetStore.DESTINATION), 1000000L));
    final Path groupFolderPath = new Path(snapshotFolder, ParquetStore.getGroupSubDir(TestGroups.EDGE, false));
    final Path reversedGroupFolderPath = new Path(snapshotFolder, ParquetStore.getGroupSubDir(TestGroups.EDGE, true));
    // Partition 0, vertex 0L
    final Path pathForPartitionFile1 = new Path(groupFolderPath, ParquetStore.getFile(0));
    expected.add(new ParquetFileQuery(pathForPartitionFile1, source0AndCount, true));
    // Partition 9, vertex 1000000L
    final Path pathForPartitionFile2 = new Path(groupFolderPath, ParquetStore.getFile(9));
    expected.add(new ParquetFileQuery(pathForPartitionFile2, source1000000AndCount, true));
    // Partition 0 of reversed, vertex 0L
    final Path pathForPartitionFile3 = new Path(reversedGroupFolderPath, ParquetStore.getFile(0));
    expected.add(new ParquetFileQuery(pathForPartitionFile3, destination0AndCount, true));
    // Partition 9 of reversed, vertex 1000000L
    final Path pathForPartitionFile4 = new Path(reversedGroupFolderPath, ParquetStore.getFile(9));
    expected.add(new ParquetFileQuery(pathForPartitionFile4, destination1000000AndCount, true));
    assertThat(expected).containsOnly(query.getAllParquetFileQueries().toArray());
    // When 4 - view with filter that can't be pushed down to Parquet, query for vertices 0 and 1000000
    getElements = new GetElements.Builder().input(new EntitySeed(0L), new EntitySeed(1000000L)).seedMatching(SeedMatching.SeedMatchingType.RELATED).view(new View.Builder().edge(TestGroups.EDGE, new ViewElementDefinition.Builder().preAggregationFilter(new ElementFilter.Builder().select("count").execute(new IsEvenFilter()).build()).build()).build()).build();
    query = new QueryGenerator(store).getParquetQuery(getElements);
    // Then 4
    expected.clear();
    // Partition 0, vertex 0L
    expected.add(new ParquetFileQuery(pathForPartitionFile1, source0, false));
    // Partition 9, vertex 1000000L
    expected.add(new ParquetFileQuery(pathForPartitionFile2, source1000000, false));
    // Partition 0 of reversed, vertex 0L
    expected.add(new ParquetFileQuery(pathForPartitionFile3, destination0, false));
    // Partition 9 of reversed, vertex 1000000L
    expected.add(new ParquetFileQuery(pathForPartitionFile4, destination1000000, false));
    assertThat(expected).containsOnly(query.getAllParquetFileQueries().toArray());
}
Also used : ParquetStore(uk.gov.gchq.gaffer.parquetstore.ParquetStore) Path(org.apache.hadoop.fs.Path) ArrayList(java.util.ArrayList) GetElements(uk.gov.gchq.gaffer.operation.impl.get.GetElements) ViewElementDefinition(uk.gov.gchq.gaffer.data.elementdefinition.view.ViewElementDefinition) View(uk.gov.gchq.gaffer.data.elementdefinition.view.View) SchemaUtils(uk.gov.gchq.gaffer.parquetstore.utils.SchemaUtils) ParquetStoreProperties(uk.gov.gchq.gaffer.parquetstore.ParquetStoreProperties) EntitySeed(uk.gov.gchq.gaffer.operation.data.EntitySeed) ArrayList(java.util.ArrayList) List(java.util.List) FilterPredicate(org.apache.parquet.filter2.predicate.FilterPredicate) IsMoreThan(uk.gov.gchq.koryphe.impl.predicate.IsMoreThan) CalculatePartitionerTest(uk.gov.gchq.gaffer.parquetstore.operation.handler.utilities.CalculatePartitionerTest) LongVertexOperationsTest(uk.gov.gchq.gaffer.parquetstore.operation.handler.LongVertexOperationsTest) Test(org.junit.jupiter.api.Test)

Example 88 with EntitySeed

use of uk.gov.gchq.gaffer.operation.data.EntitySeed in project Gaffer by gchq.

the class LongVertexOperationsTest method getSeeds.

@Override
public List<ElementSeed> getSeeds() {
    final List<ElementSeed> seeds = new ArrayList<>();
    seeds.add(new EntitySeed(5L));
    seeds.add(new EntitySeed(10L));
    seeds.add(new EntitySeed(15L));
    seeds.add(new EdgeSeed(13L, 14L, true));
    seeds.add(new EdgeSeed(2L, 3L, true));
    return seeds;
}
Also used : EdgeSeed(uk.gov.gchq.gaffer.operation.data.EdgeSeed) ArrayList(java.util.ArrayList) EntitySeed(uk.gov.gchq.gaffer.operation.data.EntitySeed) ElementSeed(uk.gov.gchq.gaffer.operation.data.ElementSeed)

Example 89 with EntitySeed

use of uk.gov.gchq.gaffer.operation.data.EntitySeed in project Gaffer by gchq.

the class LongVertexOperationsTest method getSeedsThatWontAppear.

@Override
protected List<ElementSeed> getSeedsThatWontAppear() {
    final List<ElementSeed> seeds = new ArrayList<>();
    seeds.add(new EntitySeed(-1L));
    seeds.add(new EntitySeed(300L));
    seeds.add(new EntitySeed(Long.MAX_VALUE));
    return seeds;
}
Also used : ArrayList(java.util.ArrayList) EntitySeed(uk.gov.gchq.gaffer.operation.data.EntitySeed) ElementSeed(uk.gov.gchq.gaffer.operation.data.ElementSeed)

Example 90 with EntitySeed

use of uk.gov.gchq.gaffer.operation.data.EntitySeed in project Gaffer by gchq.

the class FunctionAuthoriserTest method shouldMaintainOperationChainIfItFailsToSerialise.

@Test
public void shouldMaintainOperationChainIfItFailsToSerialise() {
    // Given
    FunctionAuthoriser authoriser = new FunctionAuthoriser(Lists.newArrayList(Identity.class));
    List fakeInput = Lists.newArrayList(new EntitySeed(1), new EntitySeed(2), new EntitySeed(3));
    GetElements getElements = new GetElements();
    getElements.setInput(fakeInput);
    // will fail serialisation
    getElements = spy(getElements);
    final OperationChain chain = new OperationChain.Builder().first(getElements).then(generateOperation(ToEntityId.class)).build();
    // When
    authoriser.preExecute(chain, new Context());
    // Then
    assertEquals(fakeInput, ((Input) chain.getOperations().get(0)).getInput());
}
Also used : Context(uk.gov.gchq.gaffer.store.Context) OperationChain(uk.gov.gchq.gaffer.operation.OperationChain) EntitySeed(uk.gov.gchq.gaffer.operation.data.EntitySeed) GetElements(uk.gov.gchq.gaffer.operation.impl.get.GetElements) ArrayList(java.util.ArrayList) List(java.util.List) Identity(uk.gov.gchq.koryphe.impl.function.Identity) Test(org.junit.jupiter.api.Test)

Aggregations

EntitySeed (uk.gov.gchq.gaffer.operation.data.EntitySeed)284 Test (org.junit.jupiter.api.Test)122 GetElements (uk.gov.gchq.gaffer.operation.impl.get.GetElements)122 Element (uk.gov.gchq.gaffer.data.element.Element)102 User (uk.gov.gchq.gaffer.user.User)92 AddElements (uk.gov.gchq.gaffer.operation.impl.add.AddElements)90 View (uk.gov.gchq.gaffer.data.elementdefinition.view.View)87 HashSet (java.util.HashSet)71 Graph (uk.gov.gchq.gaffer.graph.Graph)69 Entity (uk.gov.gchq.gaffer.data.element.Entity)65 Edge (uk.gov.gchq.gaffer.data.element.Edge)61 Test (org.junit.Test)58 ArrayList (java.util.ArrayList)55 ViewElementDefinition (uk.gov.gchq.gaffer.data.elementdefinition.view.ViewElementDefinition)46 EntityId (uk.gov.gchq.gaffer.data.element.id.EntityId)41 CloseableIterable (uk.gov.gchq.gaffer.commonutil.iterable.CloseableIterable)40 OperationChain (uk.gov.gchq.gaffer.operation.OperationChain)38 ElementFilter (uk.gov.gchq.gaffer.data.element.function.ElementFilter)36 EdgeSeed (uk.gov.gchq.gaffer.operation.data.EdgeSeed)36 OperationTest (uk.gov.gchq.gaffer.operation.OperationTest)35