Search in sources :

Example 11 with GraphPartitioner

use of uk.gov.gchq.gaffer.parquetstore.partitioner.GraphPartitioner in project Gaffer by gchq.

the class AddElementsFromRDD method calculateAndWritePartitioner.

/**
 * Calculates the new graph partitioner and writes it to file.
 *
 * @throws OperationException if an {@link IOException} is thrown
 */
private void calculateAndWritePartitioner() throws OperationException {
    // Create new graph partitioner
    LOGGER.info("Calculating new GraphPartitioner");
    final GraphPartitioner newPartitioner;
    try {
        newPartitioner = new CalculatePartitioner(new Path(getSortedAggregatedDirectory(true, true)), store.getSchema(), fs).call();
    } catch (final IOException e) {
        throw new OperationException("IOException calculating new graph partitioner", e);
    }
    LOGGER.info("New GraphPartitioner has partitions for {} groups, {} reversed edge groups", newPartitioner.getGroups().size(), newPartitioner.getGroupsForReversedEdges().size());
    // Write out graph partitioner
    Path newGraphPartitionerPath = null;
    try {
        newGraphPartitionerPath = new Path(getSortedAggregatedDirectory(true, true) + "graphPartitioner");
        final FSDataOutputStream stream = fs.create(newGraphPartitionerPath);
        LOGGER.info("Writing graph partitioner to {}", newGraphPartitionerPath);
        new GraphPartitionerSerialiser().write(newPartitioner, stream);
        stream.close();
    } catch (final IOException e) {
        throw new OperationException("IOException writing out graph partitioner to " + newGraphPartitionerPath, e);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) GraphPartitionerSerialiser(uk.gov.gchq.gaffer.parquetstore.partitioner.serialisation.GraphPartitionerSerialiser) GraphPartitioner(uk.gov.gchq.gaffer.parquetstore.partitioner.GraphPartitioner) CalculatePartitioner(uk.gov.gchq.gaffer.parquetstore.operation.handler.utilities.CalculatePartitioner) IOException(java.io.IOException) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) OperationException(uk.gov.gchq.gaffer.operation.OperationException)

Example 12 with GraphPartitioner

use of uk.gov.gchq.gaffer.parquetstore.partitioner.GraphPartitioner in project Gaffer by gchq.

the class ParquetStore method loadGraphPartitioner.

private void loadGraphPartitioner() throws StoreException {
    final String dataDir = getDataDir();
    try {
        if (fs.exists(new Path(dataDir))) {
            this.currentSnapshot = getLatestSnapshot(dataDir);
            LOGGER.info("Setting currentSnapshot to {}", this.currentSnapshot);
            final Path path = getGraphPartitionerPath();
            if (!fs.exists(path)) {
                LOGGER.info("Graph partitioner does not exist in {} so creating it", path);
                final GraphPartitioner partitioner = new CalculatePartitioner(new Path(dataDir + "/" + getSnapshotPath(this.currentSnapshot)), getSchema(), fs).call();
                LOGGER.info("Writing graph partitioner to {}", path);
                final FSDataOutputStream stream = fs.create(path);
                new GraphPartitionerSerialiser().write(partitioner, stream);
                stream.close();
            }
            LOGGER.info("Loading graph partitioner from path {}", path);
            loadGraphPartitioner(path);
        } else {
            throw new StoreException("Data directory " + dataDir + " does not exist - store is in an inconsistent state");
        }
    } catch (final IOException e) {
        throw new StoreException(e.getMessage(), e);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) GraphPartitionerSerialiser(uk.gov.gchq.gaffer.parquetstore.partitioner.serialisation.GraphPartitionerSerialiser) GraphPartitioner(uk.gov.gchq.gaffer.parquetstore.partitioner.GraphPartitioner) CalculatePartitioner(uk.gov.gchq.gaffer.parquetstore.operation.handler.utilities.CalculatePartitioner) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) IOException(java.io.IOException) StoreException(uk.gov.gchq.gaffer.store.StoreException)

Aggregations

GraphPartitioner (uk.gov.gchq.gaffer.parquetstore.partitioner.GraphPartitioner)12 Path (org.apache.hadoop.fs.Path)10 ArrayList (java.util.ArrayList)9 IOException (java.io.IOException)8 FileSystem (org.apache.hadoop.fs.FileSystem)7 GroupPartitioner (uk.gov.gchq.gaffer.parquetstore.partitioner.GroupPartitioner)7 List (java.util.List)6 FileStatus (org.apache.hadoop.fs.FileStatus)6 PartitionKey (uk.gov.gchq.gaffer.parquetstore.partitioner.PartitionKey)6 Arrays (java.util.Arrays)5 Configuration (org.apache.hadoop.conf.Configuration)5 Test (org.junit.jupiter.api.Test)5 Element (uk.gov.gchq.gaffer.data.element.Element)5 OperationException (uk.gov.gchq.gaffer.operation.OperationException)5 BiFunction (java.util.function.BiFunction)4 Collectors (java.util.stream.Collectors)4 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)4 WriteUnsortedData (uk.gov.gchq.gaffer.parquetstore.operation.handler.utilities.WriteUnsortedData)4 GraphPartitionerSerialiser (uk.gov.gchq.gaffer.parquetstore.partitioner.serialisation.GraphPartitionerSerialiser)4 ParseException (java.text.ParseException)3