 * <p>
 * Within FederatedStore an {@link Operation} is executed against a
 * collection of many graphs.
 * </p>
 * <p>
 * Problem: When an Operation contains View information about an Element
 * which is not known by the Graph; It will fail validation when executed.
 * </p>
 * <p>
 * Solution: For each operation, remove all elements from the View that is
 * unknown to the graph. This method will also update AddElements operations
 * to allow elements to be added to various federated graphs with different
 * schemas at the same time without causing validation errors.
 * </p>
 * @param operation current operation
 * @param graph     current graph
 * @param <OP>      Operation type
 * @return cloned operation with modified View for the given graph.
public static <OP extends Operation> OP updateOperationForGraph(final OP operation, final Graph graph) {
    OP resultOp = (OP) operation.shallowClone();
    if (nonNull(resultOp.getOptions())) {
        resultOp.setOptions(new HashMap<>(resultOp.getOptions()));
    if (resultOp instanceof Operations) {
        final Operations<Operation> operations = (Operations) resultOp;
        final List<Operation> resultOperations = new ArrayList<>();
        for (final Operation nestedOp : operations.getOperations()) {
            final Operation updatedNestedOp = updateOperationForGraph(nestedOp, graph);
            if (null == updatedNestedOp) {
                resultOp = null;
    } else if (resultOp instanceof OperationView) {
        final View view = ((OperationView) resultOp).getView();
        if (null != view && view.hasGroups()) {
            final View validView = createValidView(view, graph.getSchema());
            if (view != validView) {
                // then clone the operation and add the new view.
                if (validView.hasGroups()) {
                    ((OperationView) resultOp).setView(validView);
                } else if (!graph.hasTrait(StoreTrait.DYNAMIC_SCHEMA)) {
                    // The view has no groups so the operation would return
                    // nothing, so we shouldn't execute the operation.
                    resultOp = null;
    } else if (resultOp instanceof AddElements) {
        final AddElements addElements = ((AddElements) resultOp);
        if (null == addElements.getInput()) {
            if (!addElements.isValidate() || !addElements.isSkipInvalidElements()) {
                LOGGER.debug("Invalid elements will be skipped when added to {}", graph.getGraphId());
                resultOp = (OP) addElements.shallowClone();
                ((AddElements) resultOp).setValidate(true);
                ((AddElements) resultOp).setSkipInvalidElements(true);
        } else {
            resultOp = (OP) addElements.shallowClone();
            final Set<String> graphGroups = graph.getSchema().getGroups();
            final Iterable<? extends Element> filteredInput = Iterables.filter(addElements.getInput(), element -> graphGroups.contains(null != element ? element.getGroup() : null));
            ((AddElements) resultOp).setInput(filteredInput);
    return resultOp;
public void shouldAddEdgesToOneGraph() throws Exception {
    // Given
    AddElements op = new AddElements.Builder().input(new Edge.Builder().group("BasicEdge").source("testSource").dest("testDest").property("property1", 12).build()).build();
    // When
    store.execute(op, userContext);
    // Then
    assertEquals(1, getElements().size());
Example 68 with AddElements

use of in project Gaffer by gchq.

the class AddElementsHandler method addElements.

private void addElements(final AddElements addElementsOperation, final Context context, final ParquetStore store) throws OperationException {
    // Set up
    final FileSystem fs = store.getFS();
    final Schema schema = store.getSchema();
    final SchemaUtils schemaUtils = store.getSchemaUtils();
    final SparkSession spark = SparkContextUtil.getSparkSession(context, store.getProperties());
    final ExecutorService threadPool = createThreadPool(spark, store.getProperties());
    final GraphPartitioner currentGraphPartitioner = store.getGraphPartitioner();
    SparkParquetUtils.configureSparkForAddElements(spark, store.getProperties());
    // Write data from addElementsOperation split by group and partition (NB this uses the existing partitioner -
    // adding elements using this operation does not effect the partitions).
    final String tmpDirectory = store.getTempFilesDir();
    final BiFunction<String, Integer, String> directoryForGroupAndPartitionId = (group, partitionId) -> tmpDirectory + "/unsorted_unaggregated_new" + "/group=" + group + "/partition=" + partitionId;
    final BiFunction<String, Integer, String> directoryForGroupAndPartitionIdForReversedEdges = (group, partitionId) -> tmpDirectory + "/unsorted_unaggregated_new" + "/reversed-group=" + group + "/partition=" + partitionId;"Calling WriteUnsortedData to add elements");
    LOGGER.trace("currentGraphPartitioner is {}", currentGraphPartitioner);
    new WriteUnsortedData(store, currentGraphPartitioner, directoryForGroupAndPartitionId, directoryForGroupAndPartitionIdForReversedEdges).writeElements(addElementsOperation.getInput());
    // For every group and partition, aggregate the new data with the old data and then sort
    final BiFunction<String, Integer, String> directoryForSortedResultsForGroupAndPartitionId = (group, partitionId) -> tmpDirectory + "/sorted_new_old_merged" + "/group=" + group + "/partition=" + partitionId;
    final BiFunction<String, Integer, String> directoryForSortedResultsForGroupAndPartitionIdForReversedEdges = (group, partitionId) -> tmpDirectory + "/sorted_new_old_merged" + "/REVERSED-group=" + group + "/partition=" + partitionId;
    final List<Callable<CallableResult>> tasks = new ArrayList<>();
    for (final String group : schema.getGroups()) {
        final List<Partition> partitions = currentGraphPartitioner.getGroupPartitioner(group).getPartitions();
        for (final Partition partition : partitions) {
            final List<String> inputFiles = new ArrayList<>();
            // New data
            inputFiles.add(directoryForGroupAndPartitionId.apply(group, partition.getPartitionId()));
            // Old data
            inputFiles.add(store.getFile(group, partition));
            final String outputDir = directoryForSortedResultsForGroupAndPartitionId.apply(group, partition.getPartitionId());
            final AggregateAndSortData task = new AggregateAndSortData(schemaUtils, fs, inputFiles, outputDir, group, group + "-" + partition.getPartitionId(), false, store.getProperties().getCompressionCodecName(), spark);
  "Created AggregateAndSortData task for group {}, partition {}", group, partition.getPartitionId());
    for (final String group : schema.getEdgeGroups()) {
        final List<Partition> partitions = currentGraphPartitioner.getGroupPartitionerForReversedEdges(group).getPartitions();
        for (final Partition partition : partitions) {
            final List<String> inputFiles = new ArrayList<>();
            // New data
            inputFiles.add(directoryForGroupAndPartitionIdForReversedEdges.apply(group, partition.getPartitionId()));
            // Old data
            inputFiles.add(store.getFileForReversedEdges(group, partition));
            final String outputDir = directoryForSortedResultsForGroupAndPartitionIdForReversedEdges.apply(group, partition.getPartitionId());
            final AggregateAndSortData task = new AggregateAndSortData(schemaUtils, fs, inputFiles, outputDir, group, "reversed-" + group + "-" + partition.getPartitionId(), true, store.getProperties().getCompressionCodecName(), spark);
  "Created AggregateAndSortData task for reversed edge group {}, partition {}", group, partition.getPartitionId());
    try {"Invoking {} AggregateAndSortData tasks", tasks.size());
        final List<Future<CallableResult>> futures = threadPool.invokeAll(tasks);
        for (final Future<CallableResult> future : futures) {
            final CallableResult result = future.get();
  "Result {} from task", result);
    } catch (final InterruptedException e) {
        throw new OperationException("InterruptedException running AggregateAndSortData tasks", e);
    } catch (final ExecutionException e) {
        throw new OperationException("ExecutionException running AggregateAndSortData tasks", e);
    try {
        // Move results to a new snapshot directory (the -tmp at the end allows us to add data to the directory,
        // and then when this is all finished we rename the directory to remove the -tmp; this allows us to make
        // the replacement of the old data with the new data an atomic operation and ensures that a get operation
        // against the store will not read the directory when only some of the data has been moved there).
        final long snapshot = System.currentTimeMillis();
        final String newDataDir = store.getDataDir() + "/" + ParquetStore.getSnapshotPath(snapshot) + "-tmp";"Moving aggregated and sorted data to new snapshot directory {}", newDataDir);
        fs.mkdirs(new Path(newDataDir));
        for (final String group : schema.getGroups()) {
            final Path groupDir = new Path(newDataDir, ParquetStore.getGroupSubDir(group, false));
  "Created directory {}", groupDir);
        for (final String group : schema.getEdgeGroups()) {
            final Path groupDir = new Path(newDataDir, ParquetStore.getGroupSubDir(group, true));
  "Created directory {}", groupDir);
        for (final String group : schema.getGroups()) {
            final String groupDir = newDataDir + "/" + ParquetStore.getGroupSubDir(group, false);
            final List<Partition> partitions = currentGraphPartitioner.getGroupPartitioner(group).getPartitions();
            for (final Partition partition : partitions) {
                final Path outputDir = new Path(directoryForSortedResultsForGroupAndPartitionId.apply(group, partition.getPartitionId()));
                if (!fs.exists(outputDir)) {
          "Not moving data for group {}, partition id {} as the outputDir {} does not exist", group, partition.getPartitionId(), outputDir);
                } else {
                    // One .parquet file and one .parquet.crc file
                    final FileStatus[] status = fs.listStatus(outputDir, path -> path.getName().endsWith(".parquet"));
                    if (1 != status.length) {
                        LOGGER.error("Didn't find one Parquet file in path {} (found {} files)", outputDir, status.length);
                        throw new OperationException("Expected to find one Parquet file in path " + outputDir + " (found " + status.length + " files)");
                    } else {
                        final Path destination = new Path(groupDir, ParquetStore.getFile(partition.getPartitionId()));
              "Renaming {} to {}", status[0].getPath(), destination);
                        fs.rename(status[0].getPath(), destination);
        for (final String group : schema.getEdgeGroups()) {
            final String groupDir = newDataDir + "/" + ParquetStore.getGroupSubDir(group, true);
            final List<Partition> partitions = currentGraphPartitioner.getGroupPartitionerForReversedEdges(group).getPartitions();
            for (final Partition partition : partitions) {
                final Path outputDir = new Path(directoryForSortedResultsForGroupAndPartitionIdForReversedEdges.apply(group, partition.getPartitionId()));
                if (!fs.exists(outputDir)) {
          "Not moving data for reversed edge group {}, partition id {} as the outputDir {} does not exist", group, partition.getPartitionId(), outputDir);
                } else {
                    // One .parquet file and one .parquet.crc file
                    final FileStatus[] status = fs.listStatus(outputDir, path -> path.getName().endsWith(".parquet"));
                    if (1 != status.length) {
                        LOGGER.error("Didn't find one Parquet file in path {} (found {} files)", outputDir, status.length);
                        throw new OperationException("Expected to find one Parquet file in path " + outputDir + " (found " + status.length + " files)");
                    } else {
                        final Path destination = new Path(groupDir, ParquetStore.getFile(partition.getPartitionId()));
              "Renaming {} to {}", status[0].getPath(), destination);
                        fs.rename(status[0].getPath(), destination);
        // Delete temporary data directory"Deleting temporary directory {}", tmpDirectory);
        fs.delete(new Path(tmpDirectory), true);
        // Write out graph partitioner (unchanged from previous one)
        final Path newGraphPartitionerPath = new Path(newDataDir + "/graphPartitioner");
        final FSDataOutputStream stream = fs.create(newGraphPartitionerPath);"Writing graph partitioner to {}", newGraphPartitionerPath);
        new GraphPartitionerSerialiser().write(currentGraphPartitioner, stream);
        // Move snapshot-tmp directory to snapshot
        final String directoryWithoutTmp = newDataDir.substring(0, newDataDir.lastIndexOf("-tmp"));"Renaming {} to {}", newDataDir, directoryWithoutTmp);
        fs.rename(new Path(newDataDir), new Path(directoryWithoutTmp));
        // Set snapshot on store to new value"Updating latest snapshot on store to {}", snapshot);
    } catch (final IOException | StoreException e) {
        throw new OperationException("IOException moving results files into new snapshot directory", e);
Also used : ParquetStoreProperties( StoreException( FileSystem(org.apache.hadoop.fs.FileSystem) AggregateAndSortData( BiFunction(java.util.function.BiFunction) LoggerFactory(org.slf4j.LoggerFactory) Callable(java.util.concurrent.Callable) FileStatus(org.apache.hadoop.fs.FileStatus) ParquetStore( ArrayList(java.util.ArrayList) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) Future(java.util.concurrent.Future) SparkParquetUtils( Path(org.apache.hadoop.fs.Path) ExecutorService(java.util.concurrent.ExecutorService) SparkSession(org.apache.spark.sql.SparkSession) Logger(org.slf4j.Logger) Partition( SparkContextUtil( SchemaUtils( IOException( Option(scala.Option) Executors(java.util.concurrent.Executors) ExecutionException(java.util.concurrent.ExecutionException) Store( List(java.util.List) WriteUnsortedData( GraphPartitionerSerialiser( Context( Schema( GraphPartitioner( AddElements( OperationException( OperationHandler( CallableResult( SparkSession(org.apache.spark.sql.SparkSession) FileStatus(org.apache.hadoop.fs.FileStatus) Schema( ArrayList(java.util.ArrayList) AggregateAndSortData( Callable(java.util.concurrent.Callable) SchemaUtils( GraphPartitioner( FileSystem(org.apache.hadoop.fs.FileSystem) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) ExecutionException(java.util.concurrent.ExecutionException) OperationException( Path(org.apache.hadoop.fs.Path) GraphPartitionerSerialiser( Partition( WriteUnsortedData( IOException( StoreException( ExecutorService(java.util.concurrent.ExecutorService) Future(java.util.concurrent.Future) CallableResult(

public void testGetElementsByEdgeIdWithViewRestrictedByGroupAndATransform() throws OperationException {
    // Given
    final Graph graph = GetAllElementsHandlerTest.getGraph();
    final AddElements addElements = new AddElements.Builder().input(getElements()).build();
    graph.execute(addElements, new User());
    // When
    final GetElements getElements = new GetElements.Builder().input(new EdgeSeed("A", "B0", true)).view(new View.Builder().edge(GetAllElementsHandlerTest.BASIC_EDGE1, new ViewElementDefinition.Builder().transformer(new ElementTransformer.Builder().select(GetAllElementsHandlerTest.COUNT).execute(new ExampleTransform()).project(GetAllElementsHandlerTest.COUNT).build()).build()).build()).build();
    final CloseableIterable<? extends Element> results = graph.execute(getElements, new User());
    // Then
    final Set<Element> resultsSet = new HashSet<>();
    final Set<Element> expectedResults = new HashSet<>();
    getElements().stream().filter(element -> {
        if (element instanceof Entity) {
            return ((Entity) element).getVertex().equals("A") || ((Entity) element).getVertex().equals("B0");
        } else {
            final Edge edge = (Edge) element;
            return edge.getSource().equals("A") && edge.getDestination().equals("B0");
    }).filter(e -> e.getGroup().equals(GetAllElementsHandlerTest.BASIC_EDGE1)).map(element -> {
        element.putProperty(GetAllElementsHandlerTest.COUNT, ((Integer) element.getProperty(GetAllElementsHandlerTest.COUNT)) + ExampleTransform.INCREMENT_BY);
        return element;
    assertEquals(expectedResults, resultsSet);
public void testGetElementsByEntityIdWithViewRestrictedByGroup() throws OperationException {
    // Given
    final Graph graph = GetAllElementsHandlerTest.getGraph();
    final AddElements addElements = new AddElements.Builder().input(getElements()).build();
    graph.execute(addElements, new User());
    // When
    final GetElements getElements = new GetElements.Builder().input(new EntitySeed("A")).view(new View.Builder().edge(GetAllElementsHandlerTest.BASIC_EDGE1).build()).build();
    final CloseableIterable<? extends Element> results = graph.execute(getElements, new User());
    // Then
    final Set<Element> resultsSet = new HashSet<>();
    final Set<Element> expectedResults = new HashSet<>();
    getElements().stream().filter(element -> element.getGroup().equals(GetAllElementsHandlerTest.BASIC_EDGE1)).filter(element -> {
        if (element instanceof Entity) {
            return ((Entity) element).getVertex().equals("A");
        } else {
            final Edge edge = (Edge) element;
            return edge.getSource().equals("A") || edge.getDestination().equals("A");
    assertEquals(expectedResults, resultsSet);
