Examples with StoreException - uk.gov.gchq.gaffer.store.StoreException

Example 36 with StoreException

use of uk.gov.gchq.gaffer.store.StoreException in project Gaffer by gchq.

the class AddElementsHandler method addElements.

private void addElements(final AddElements addElementsOperation, final Context context, final ParquetStore store) throws OperationException {
    // Set up
    final FileSystem fs = store.getFS();
    final Schema schema = store.getSchema();
    final SchemaUtils schemaUtils = store.getSchemaUtils();
    final SparkSession spark = SparkContextUtil.getSparkSession(context, store.getProperties());
    final ExecutorService threadPool = createThreadPool(spark, store.getProperties());
    final GraphPartitioner currentGraphPartitioner = store.getGraphPartitioner();
    SparkParquetUtils.configureSparkForAddElements(spark, store.getProperties());
    // Write data from addElementsOperation split by group and partition (NB this uses the existing partitioner -
    // adding elements using this operation does not effect the partitions).
    final String tmpDirectory = store.getTempFilesDir();
    final BiFunction<String, Integer, String> directoryForGroupAndPartitionId = (group, partitionId) -> tmpDirectory + "/unsorted_unaggregated_new" + "/group=" + group + "/partition=" + partitionId;
    final BiFunction<String, Integer, String> directoryForGroupAndPartitionIdForReversedEdges = (group, partitionId) -> tmpDirectory + "/unsorted_unaggregated_new" + "/reversed-group=" + group + "/partition=" + partitionId;
    LOGGER.info("Calling WriteUnsortedData to add elements");
    LOGGER.trace("currentGraphPartitioner is {}", currentGraphPartitioner);
    new WriteUnsortedData(store, currentGraphPartitioner, directoryForGroupAndPartitionId, directoryForGroupAndPartitionIdForReversedEdges).writeElements(addElementsOperation.getInput());
    // For every group and partition, aggregate the new data with the old data and then sort
    final BiFunction<String, Integer, String> directoryForSortedResultsForGroupAndPartitionId = (group, partitionId) -> tmpDirectory + "/sorted_new_old_merged" + "/group=" + group + "/partition=" + partitionId;
    final BiFunction<String, Integer, String> directoryForSortedResultsForGroupAndPartitionIdForReversedEdges = (group, partitionId) -> tmpDirectory + "/sorted_new_old_merged" + "/REVERSED-group=" + group + "/partition=" + partitionId;
    final List<Callable<CallableResult>> tasks = new ArrayList<>();
    for (final String group : schema.getGroups()) {
        final List<Partition> partitions = currentGraphPartitioner.getGroupPartitioner(group).getPartitions();
        for (final Partition partition : partitions) {
            final List<String> inputFiles = new ArrayList<>();
            // New data
            inputFiles.add(directoryForGroupAndPartitionId.apply(group, partition.getPartitionId()));
            // Old data
            inputFiles.add(store.getFile(group, partition));
            final String outputDir = directoryForSortedResultsForGroupAndPartitionId.apply(group, partition.getPartitionId());
            final AggregateAndSortData task = new AggregateAndSortData(schemaUtils, fs, inputFiles, outputDir, group, group + "-" + partition.getPartitionId(), false, store.getProperties().getCompressionCodecName(), spark);
            tasks.add(task);
            LOGGER.info("Created AggregateAndSortData task for group {}, partition {}", group, partition.getPartitionId());
        }
    }
    for (final String group : schema.getEdgeGroups()) {
        final List<Partition> partitions = currentGraphPartitioner.getGroupPartitionerForReversedEdges(group).getPartitions();
        for (final Partition partition : partitions) {
            final List<String> inputFiles = new ArrayList<>();
            // New data
            inputFiles.add(directoryForGroupAndPartitionIdForReversedEdges.apply(group, partition.getPartitionId()));
            // Old data
            inputFiles.add(store.getFileForReversedEdges(group, partition));
            final String outputDir = directoryForSortedResultsForGroupAndPartitionIdForReversedEdges.apply(group, partition.getPartitionId());
            final AggregateAndSortData task = new AggregateAndSortData(schemaUtils, fs, inputFiles, outputDir, group, "reversed-" + group + "-" + partition.getPartitionId(), true, store.getProperties().getCompressionCodecName(), spark);
            tasks.add(task);
            LOGGER.info("Created AggregateAndSortData task for reversed edge group {}, partition {}", group, partition.getPartitionId());
        }
    }
    try {
        LOGGER.info("Invoking {} AggregateAndSortData tasks", tasks.size());
        final List<Future<CallableResult>> futures = threadPool.invokeAll(tasks);
        for (final Future<CallableResult> future : futures) {
            final CallableResult result = future.get();
            LOGGER.info("Result {} from task", result);
        }
    } catch (final InterruptedException e) {
        throw new OperationException("InterruptedException running AggregateAndSortData tasks", e);
    } catch (final ExecutionException e) {
        throw new OperationException("ExecutionException running AggregateAndSortData tasks", e);
    }
    try {
        // Move results to a new snapshot directory (the -tmp at the end allows us to add data to the directory,
        // and then when this is all finished we rename the directory to remove the -tmp; this allows us to make
        // the replacement of the old data with the new data an atomic operation and ensures that a get operation
        // against the store will not read the directory when only some of the data has been moved there).
        final long snapshot = System.currentTimeMillis();
        final String newDataDir = store.getDataDir() + "/" + ParquetStore.getSnapshotPath(snapshot) + "-tmp";
        LOGGER.info("Moving aggregated and sorted data to new snapshot directory {}", newDataDir);
        fs.mkdirs(new Path(newDataDir));
        for (final String group : schema.getGroups()) {
            final Path groupDir = new Path(newDataDir, ParquetStore.getGroupSubDir(group, false));
            fs.mkdirs(groupDir);
            LOGGER.info("Created directory {}", groupDir);
        }
        for (final String group : schema.getEdgeGroups()) {
            final Path groupDir = new Path(newDataDir, ParquetStore.getGroupSubDir(group, true));
            fs.mkdirs(groupDir);
            LOGGER.info("Created directory {}", groupDir);
        }
        for (final String group : schema.getGroups()) {
            final String groupDir = newDataDir + "/" + ParquetStore.getGroupSubDir(group, false);
            final List<Partition> partitions = currentGraphPartitioner.getGroupPartitioner(group).getPartitions();
            for (final Partition partition : partitions) {
                final Path outputDir = new Path(directoryForSortedResultsForGroupAndPartitionId.apply(group, partition.getPartitionId()));
                if (!fs.exists(outputDir)) {
                    LOGGER.info("Not moving data for group {}, partition id {} as the outputDir {} does not exist", group, partition.getPartitionId(), outputDir);
                } else {
                    // One .parquet file and one .parquet.crc file
                    final FileStatus[] status = fs.listStatus(outputDir, path -> path.getName().endsWith(".parquet"));
                    if (1 != status.length) {
                        LOGGER.error("Didn't find one Parquet file in path {} (found {} files)", outputDir, status.length);
                        throw new OperationException("Expected to find one Parquet file in path " + outputDir + " (found " + status.length + " files)");
                    } else {
                        final Path destination = new Path(groupDir, ParquetStore.getFile(partition.getPartitionId()));
                        LOGGER.info("Renaming {} to {}", status[0].getPath(), destination);
                        fs.rename(status[0].getPath(), destination);
                    }
                }
            }
        }
        for (final String group : schema.getEdgeGroups()) {
            final String groupDir = newDataDir + "/" + ParquetStore.getGroupSubDir(group, true);
            final List<Partition> partitions = currentGraphPartitioner.getGroupPartitionerForReversedEdges(group).getPartitions();
            for (final Partition partition : partitions) {
                final Path outputDir = new Path(directoryForSortedResultsForGroupAndPartitionIdForReversedEdges.apply(group, partition.getPartitionId()));
                if (!fs.exists(outputDir)) {
                    LOGGER.info("Not moving data for reversed edge group {}, partition id {} as the outputDir {} does not exist", group, partition.getPartitionId(), outputDir);
                } else {
                    // One .parquet file and one .parquet.crc file
                    final FileStatus[] status = fs.listStatus(outputDir, path -> path.getName().endsWith(".parquet"));
                    if (1 != status.length) {
                        LOGGER.error("Didn't find one Parquet file in path {} (found {} files)", outputDir, status.length);
                        throw new OperationException("Expected to find one Parquet file in path " + outputDir + " (found " + status.length + " files)");
                    } else {
                        final Path destination = new Path(groupDir, ParquetStore.getFile(partition.getPartitionId()));
                        LOGGER.info("Renaming {} to {}", status[0].getPath(), destination);
                        fs.rename(status[0].getPath(), destination);
                    }
                }
            }
        }
        // Delete temporary data directory
        LOGGER.info("Deleting temporary directory {}", tmpDirectory);
        fs.delete(new Path(tmpDirectory), true);
        // Write out graph partitioner (unchanged from previous one)
        final Path newGraphPartitionerPath = new Path(newDataDir + "/graphPartitioner");
        final FSDataOutputStream stream = fs.create(newGraphPartitionerPath);
        LOGGER.info("Writing graph partitioner to {}", newGraphPartitionerPath);
        new GraphPartitionerSerialiser().write(currentGraphPartitioner, stream);
        stream.close();
        // Move snapshot-tmp directory to snapshot
        final String directoryWithoutTmp = newDataDir.substring(0, newDataDir.lastIndexOf("-tmp"));
        LOGGER.info("Renaming {} to {}", newDataDir, directoryWithoutTmp);
        fs.rename(new Path(newDataDir), new Path(directoryWithoutTmp));
        // Set snapshot on store to new value
        LOGGER.info("Updating latest snapshot on store to {}", snapshot);
        store.setLatestSnapshot(snapshot);
    } catch (final IOException | StoreException e) {
        throw new OperationException("IOException moving results files into new snapshot directory", e);
    }
}

Also used : ParquetStoreProperties(uk.gov.gchq.gaffer.parquetstore.ParquetStoreProperties) StoreException(uk.gov.gchq.gaffer.store.StoreException) FileSystem(org.apache.hadoop.fs.FileSystem) AggregateAndSortData(uk.gov.gchq.gaffer.parquetstore.operation.handler.utilities.AggregateAndSortData) BiFunction(java.util.function.BiFunction) LoggerFactory(org.slf4j.LoggerFactory) Callable(java.util.concurrent.Callable) FileStatus(org.apache.hadoop.fs.FileStatus) ParquetStore(uk.gov.gchq.gaffer.parquetstore.ParquetStore) ArrayList(java.util.ArrayList) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) Future(java.util.concurrent.Future) SparkParquetUtils(uk.gov.gchq.gaffer.parquetstore.utils.SparkParquetUtils) Path(org.apache.hadoop.fs.Path) ExecutorService(java.util.concurrent.ExecutorService) SparkSession(org.apache.spark.sql.SparkSession) Logger(org.slf4j.Logger) Partition(uk.gov.gchq.gaffer.parquetstore.partitioner.Partition) SparkContextUtil(uk.gov.gchq.gaffer.spark.SparkContextUtil) SchemaUtils(uk.gov.gchq.gaffer.parquetstore.utils.SchemaUtils) IOException(java.io.IOException) Option(scala.Option) Executors(java.util.concurrent.Executors) ExecutionException(java.util.concurrent.ExecutionException) Store(uk.gov.gchq.gaffer.store.Store) List(java.util.List) WriteUnsortedData(uk.gov.gchq.gaffer.parquetstore.operation.handler.utilities.WriteUnsortedData) GraphPartitionerSerialiser(uk.gov.gchq.gaffer.parquetstore.partitioner.serialisation.GraphPartitionerSerialiser) Context(uk.gov.gchq.gaffer.store.Context) Schema(uk.gov.gchq.gaffer.store.schema.Schema) GraphPartitioner(uk.gov.gchq.gaffer.parquetstore.partitioner.GraphPartitioner) AddElements(uk.gov.gchq.gaffer.operation.impl.add.AddElements) OperationException(uk.gov.gchq.gaffer.operation.OperationException) OperationHandler(uk.gov.gchq.gaffer.store.operation.handler.OperationHandler) CallableResult(uk.gov.gchq.gaffer.parquetstore.operation.handler.utilities.CallableResult) SparkSession(org.apache.spark.sql.SparkSession) FileStatus(org.apache.hadoop.fs.FileStatus) Schema(uk.gov.gchq.gaffer.store.schema.Schema) ArrayList(java.util.ArrayList) AggregateAndSortData(uk.gov.gchq.gaffer.parquetstore.operation.handler.utilities.AggregateAndSortData) Callable(java.util.concurrent.Callable) SchemaUtils(uk.gov.gchq.gaffer.parquetstore.utils.SchemaUtils) GraphPartitioner(uk.gov.gchq.gaffer.parquetstore.partitioner.GraphPartitioner) FileSystem(org.apache.hadoop.fs.FileSystem) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) ExecutionException(java.util.concurrent.ExecutionException) OperationException(uk.gov.gchq.gaffer.operation.OperationException) Path(org.apache.hadoop.fs.Path) GraphPartitionerSerialiser(uk.gov.gchq.gaffer.parquetstore.partitioner.serialisation.GraphPartitionerSerialiser) Partition(uk.gov.gchq.gaffer.parquetstore.partitioner.Partition) WriteUnsortedData(uk.gov.gchq.gaffer.parquetstore.operation.handler.utilities.WriteUnsortedData) IOException(java.io.IOException) StoreException(uk.gov.gchq.gaffer.store.StoreException) ExecutorService(java.util.concurrent.ExecutorService) Future(java.util.concurrent.Future) CallableResult(uk.gov.gchq.gaffer.parquetstore.operation.handler.utilities.CallableResult)

Example 37 with StoreException

use of uk.gov.gchq.gaffer.store.StoreException in project Gaffer by gchq.

the class ElementClonerTest method testElementCloner.

@Test
public void testElementCloner() throws StoreException {
    // Given
    final ElementCloner cloner = new ElementCloner();
    final MapStore mapStore = new MapStore();
    mapStore.initialise("graphId", GetAllElementsHandlerTest.getSchema(), new MapStoreProperties());
    // Then
    Streams.toStream(GetAllElementsHandlerTest.getElements()).map(element -> new Pair<>(element, cloner.cloneElement(element, mapStore.getSchema()))).forEach(pair -> assertEquals(pair.getFirst(), pair.getSecond()));
}

Also used : Test(org.junit.jupiter.api.Test) StoreException(uk.gov.gchq.gaffer.store.StoreException) Pair(uk.gov.gchq.gaffer.commonutil.pair.Pair) MapStoreProperties(uk.gov.gchq.gaffer.mapstore.MapStoreProperties) MapStore(uk.gov.gchq.gaffer.mapstore.MapStore) GetAllElementsHandlerTest(uk.gov.gchq.gaffer.mapstore.impl.GetAllElementsHandlerTest) Assertions.assertEquals(org.junit.jupiter.api.Assertions.assertEquals) Streams(uk.gov.gchq.gaffer.commonutil.stream.Streams) MapStore(uk.gov.gchq.gaffer.mapstore.MapStore) MapStoreProperties(uk.gov.gchq.gaffer.mapstore.MapStoreProperties) Pair(uk.gov.gchq.gaffer.commonutil.pair.Pair) Test(org.junit.jupiter.api.Test) GetAllElementsHandlerTest(uk.gov.gchq.gaffer.mapstore.impl.GetAllElementsHandlerTest)

Example 38 with StoreException

use of uk.gov.gchq.gaffer.store.StoreException in project Gaffer by gchq.

the class ProxyStore method handleResponse.

protected <OUTPUT> OUTPUT handleResponse(final Response response, final TypeReference<OUTPUT> outputTypeReference) throws StoreException {
    final String outputJson = response.hasEntity() ? response.readEntity(String.class) : null;
    if (200 != response.getStatus() && 204 != response.getStatus()) {
        LOGGER.warn("Gaffer bad status " + response.getStatus());
        LOGGER.warn("Detail: " + outputJson);
        throw new StoreException("Delegate Gaffer store returned status: " + response.getStatus() + ". Response content was: " + outputJson);
    }
    OUTPUT output = null;
    if (null != outputJson) {
        try {
            output = deserialise(outputJson, outputTypeReference);
        } catch (final SerialisationException e) {
            throw new StoreException(e.getMessage(), e);
        }
    }
    return output;
}

Also used : SerialisationException(uk.gov.gchq.gaffer.exception.SerialisationException) StoreException(uk.gov.gchq.gaffer.store.StoreException)

Example 39 with StoreException

use of uk.gov.gchq.gaffer.store.StoreException in project Gaffer by gchq.

the class ProxyStore method doGet.

protected <OUTPUT> OUTPUT doGet(final URL url, final TypeReference<OUTPUT> outputTypeReference, final Context context) throws StoreException {
    final Invocation.Builder request = createRequest(null, url, context);
    final Response response;
    try {
        response = request.get();
    } catch (final Exception e) {
        throw new StoreException("Request failed to execute via url " + url.toExternalForm(), e);
    }
    return handleResponse(response, outputTypeReference);
}

Also used : Response(javax.ws.rs.core.Response) Builder(javax.ws.rs.client.Invocation.Builder) Invocation(javax.ws.rs.client.Invocation) StoreException(uk.gov.gchq.gaffer.store.StoreException) SerialisationException(uk.gov.gchq.gaffer.exception.SerialisationException) OperationException(uk.gov.gchq.gaffer.operation.OperationException) UnsupportedEncodingException(java.io.UnsupportedEncodingException) StoreException(uk.gov.gchq.gaffer.store.StoreException)

Example 40 with StoreException

use of uk.gov.gchq.gaffer.store.StoreException in project Gaffer by gchq.

the class SingleUseMockAccumuloProxyStore method startMockAccumuloRestApi.

protected void startMockAccumuloRestApi(final Schema accumuloSchema) throws StoreException {
    try {
        testFolder.delete();
        testFolder.create();
    } catch (final IOException e) {
        throw new StoreException("Unable to create temporary folder", e);
    }
    final StoreProperties accumuloStoreProperties = StoreProperties.loadStoreProperties(StreamUtil.openStream(getClass(), "accumulo-store.properties"));
    try {
        RestApiTestUtil.reinitialiseGraph(testFolder, accumuloSchema, accumuloStoreProperties);
    } catch (final IOException e) {
        throw new StoreException("Unable to reinitialise delegate graph", e);
    }
}

Also used : IOException(java.io.IOException) StoreProperties(uk.gov.gchq.gaffer.store.StoreProperties) StoreException(uk.gov.gchq.gaffer.store.StoreException)

Aggregations

StoreException (uk.gov.gchq.gaffer.store.StoreException)70 OperationException (uk.gov.gchq.gaffer.operation.OperationException)26 IOException (java.io.IOException)21 Path (org.apache.hadoop.fs.Path)11 Schema (uk.gov.gchq.gaffer.store.schema.Schema)11 HashSet (java.util.HashSet)10 AccumuloSecurityException (org.apache.accumulo.core.client.AccumuloSecurityException)10 Element (uk.gov.gchq.gaffer.data.element.Element)10 UnsupportedEncodingException (java.io.UnsupportedEncodingException)9 TableNotFoundException (org.apache.accumulo.core.client.TableNotFoundException)9 IteratorSettingException (uk.gov.gchq.gaffer.accumulostore.key.exception.IteratorSettingException)9 SerialisationException (uk.gov.gchq.gaffer.exception.SerialisationException)9 ArrayList (java.util.ArrayList)8 AccumuloException (org.apache.accumulo.core.client.AccumuloException)8 Configuration (org.apache.hadoop.conf.Configuration)8 Test (org.junit.jupiter.api.Test)8 User (uk.gov.gchq.gaffer.user.User)8 Set (java.util.Set)6 IteratorSetting (org.apache.accumulo.core.client.IteratorSetting)6 FileSystem (org.apache.hadoop.fs.FileSystem)6