Examples with GroupPartitioner - uk.gov.gchq.gaffer.parquetstore.partitioner.GroupPartitioner

Example 1 with GroupPartitioner

use of uk.gov.gchq.gaffer.parquetstore.partitioner.GroupPartitioner in project Gaffer by gchq.

the class CalculatePartitionerTest method calculatePartitionerTest.

@Test
public void calculatePartitionerTest(@TempDir java.nio.file.Path tempDir) throws IOException {
    // Given
    final FileSystem fs = FileSystem.get(new Configuration());
    final Schema schema = getSchema();
    final SchemaUtils schemaUtils = new SchemaUtils(schema);
    final String topLevelFolder = tempDir.toString();
    writeData(topLevelFolder, schemaUtils);
    // When
    // - Calculate partitioner from files
    final GraphPartitioner actual = new CalculatePartitioner(new Path(topLevelFolder), schema, fs).call();
    // - Manually create the correct partitioner
    final GraphPartitioner expected = new GraphPartitioner();
    final List<PartitionKey> splitPointsEntity = new ArrayList<>();
    for (int i = 1; i < 10; i++) {
        splitPointsEntity.add(new PartitionKey(new Object[] { 10L * i }));
    }
    final GroupPartitioner groupPartitionerEntity = new GroupPartitioner(TestGroups.ENTITY, splitPointsEntity);
    expected.addGroupPartitioner(TestGroups.ENTITY, groupPartitionerEntity);
    final GroupPartitioner groupPartitionerEntity2 = new GroupPartitioner(TestGroups.ENTITY_2, splitPointsEntity);
    expected.addGroupPartitioner(TestGroups.ENTITY_2, groupPartitionerEntity2);
    final List<PartitionKey> splitPointsEdge = new ArrayList<>();
    for (int i = 1; i < 10; i++) {
        splitPointsEdge.add(new PartitionKey(new Object[] { 10L * i, 10L * i + 1, true }));
    }
    final GroupPartitioner groupPartitionerEdge = new GroupPartitioner(TestGroups.EDGE, splitPointsEdge);
    expected.addGroupPartitioner(TestGroups.EDGE, groupPartitionerEdge);
    final GroupPartitioner groupPartitionerEdge2 = new GroupPartitioner(TestGroups.EDGE_2, splitPointsEdge);
    expected.addGroupPartitioner(TestGroups.EDGE_2, groupPartitionerEdge2);
    final List<PartitionKey> splitPointsReversedEdge = new ArrayList<>();
    for (int i = 1; i < 10; i++) {
        splitPointsReversedEdge.add(new PartitionKey(new Object[] { 10L * i + 1, 10L * i, true }));
    }
    final GroupPartitioner reversedGroupPartitionerEdge = new GroupPartitioner(TestGroups.EDGE, splitPointsReversedEdge);
    expected.addGroupPartitionerForReversedEdges(TestGroups.EDGE, reversedGroupPartitionerEdge);
    final GroupPartitioner reversedGroupPartitionerEdge2 = new GroupPartitioner(TestGroups.EDGE_2, splitPointsReversedEdge);
    expected.addGroupPartitionerForReversedEdges(TestGroups.EDGE_2, reversedGroupPartitionerEdge2);
    // Then
    assertEquals(expected, actual);
}

Also used : Path(org.apache.hadoop.fs.Path) GroupPartitioner(uk.gov.gchq.gaffer.parquetstore.partitioner.GroupPartitioner) Configuration(org.apache.hadoop.conf.Configuration) Schema(uk.gov.gchq.gaffer.store.schema.Schema) ArrayList(java.util.ArrayList) SchemaUtils(uk.gov.gchq.gaffer.parquetstore.utils.SchemaUtils) GraphPartitioner(uk.gov.gchq.gaffer.parquetstore.partitioner.GraphPartitioner) FileSystem(org.apache.hadoop.fs.FileSystem) PartitionKey(uk.gov.gchq.gaffer.parquetstore.partitioner.PartitionKey) Test(org.junit.jupiter.api.Test)

Example 2 with GroupPartitioner

use of uk.gov.gchq.gaffer.parquetstore.partitioner.GroupPartitioner in project Gaffer by gchq.

the class WriteUnsortedDataTest method testNoSplitPointsCase.

@Test
public void testNoSplitPointsCase(@TempDir java.nio.file.Path tempDir) throws IOException, OperationException {
    // Given
    final String tempFilesDir = tempDir.toAbsolutePath().toString();
    final SchemaUtils schemaUtils = new SchemaUtils(TestUtils.gafferSchema("schemaUsingLongVertexType"));
    final GraphPartitioner graphPartitioner = new GraphPartitioner();
    graphPartitioner.addGroupPartitioner(TestGroups.ENTITY, new GroupPartitioner(TestGroups.ENTITY, new ArrayList<>()));
    graphPartitioner.addGroupPartitioner(TestGroups.ENTITY_2, new GroupPartitioner(TestGroups.ENTITY_2, new ArrayList<>()));
    graphPartitioner.addGroupPartitioner(TestGroups.EDGE, new GroupPartitioner(TestGroups.EDGE, new ArrayList<>()));
    graphPartitioner.addGroupPartitioner(TestGroups.EDGE_2, new GroupPartitioner(TestGroups.EDGE_2, new ArrayList<>()));
    graphPartitioner.addGroupPartitionerForReversedEdges(TestGroups.EDGE, new GroupPartitioner(TestGroups.EDGE, new ArrayList<>()));
    graphPartitioner.addGroupPartitionerForReversedEdges(TestGroups.EDGE_2, new GroupPartitioner(TestGroups.EDGE_2, new ArrayList<>()));
    final List<Element> elements = getData(3L);
    final BiFunction<String, Integer, String> fileNameForGroupAndPartitionId = (group, partitionId) -> tempFilesDir + "/GROUP=" + group + "/split-" + partitionId;
    final BiFunction<String, Integer, String> fileNameForGroupAndPartitionIdForReversedEdge = (group, partitionId) -> tempFilesDir + "/REVERSED-GROUP=" + group + "/split-" + partitionId;
    final WriteUnsortedData writeUnsortedData = new WriteUnsortedData(tempFilesDir, CompressionCodecName.GZIP, schemaUtils, graphPartitioner, fileNameForGroupAndPartitionId, fileNameForGroupAndPartitionIdForReversedEdge);
    // When
    writeUnsortedData.writeElements(elements);
    // Then
    // - Each directory should exist and contain one file
    testExistsAndContainsNFiles(tempFilesDir + "/GROUP=" + TestGroups.ENTITY + "/split-0", 1);
    testExistsAndContainsNFiles(tempFilesDir + "/GROUP=" + TestGroups.ENTITY_2 + "/split-0", 1);
    testExistsAndContainsNFiles(tempFilesDir + "/GROUP=" + TestGroups.EDGE + "/split-0", 1);
    testExistsAndContainsNFiles(tempFilesDir + "/GROUP=" + TestGroups.EDGE_2 + "/split-0", 1);
    testExistsAndContainsNFiles(tempFilesDir + "/REVERSED-GROUP=" + TestGroups.EDGE + "/split-0", 1);
    testExistsAndContainsNFiles(tempFilesDir + "/REVERSED-GROUP=" + TestGroups.EDGE_2 + "/split-0", 1);
    // - Each file should contain the data that was written to it, in the order it was in the iterable
    testContainsCorrectDataNoSplitPoints(TestGroups.ENTITY, tempFilesDir + "/GROUP=" + TestGroups.ENTITY + "/split-0", elements, schemaUtils);
    testContainsCorrectDataNoSplitPoints(TestGroups.ENTITY_2, tempFilesDir + "/GROUP=" + TestGroups.ENTITY_2 + "/split-0", elements, schemaUtils);
    testContainsCorrectDataNoSplitPoints(TestGroups.EDGE, tempFilesDir + "/GROUP=" + TestGroups.EDGE + "/split-0", elements, schemaUtils);
    testContainsCorrectDataNoSplitPoints(TestGroups.EDGE_2, tempFilesDir + "/GROUP=" + TestGroups.EDGE_2 + "/split-0", elements, schemaUtils);
    testContainsCorrectDataNoSplitPoints(TestGroups.EDGE, tempFilesDir + "/REVERSED-GROUP=" + TestGroups.EDGE + "/split-0", elements, schemaUtils);
    final List<Element> elementsWithSameSrcDstRemoved = elements.stream().filter(e -> e.getGroup().equals(TestGroups.EDGE_2)).map(e -> (Edge) e).filter(e -> !e.getSource().equals(e.getDestination())).collect(Collectors.toList());
    testContainsCorrectDataNoSplitPoints(TestGroups.EDGE_2, tempFilesDir + "/REVERSED-GROUP=" + TestGroups.EDGE_2 + "/split-0", elementsWithSameSrcDstRemoved, schemaUtils);
}

Also used : GroupPartitioner(uk.gov.gchq.gaffer.parquetstore.partitioner.GroupPartitioner) Assertions.fail(org.junit.jupiter.api.Assertions.fail) Arrays(java.util.Arrays) FileSystem(org.apache.hadoop.fs.FileSystem) Date(java.util.Date) BiFunction(java.util.function.BiFunction) SerialisationException(uk.gov.gchq.gaffer.exception.SerialisationException) SimpleDateFormat(java.text.SimpleDateFormat) GroupPartitioner(uk.gov.gchq.gaffer.parquetstore.partitioner.GroupPartitioner) FileStatus(org.apache.hadoop.fs.FileStatus) Element(uk.gov.gchq.gaffer.data.element.Element) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) PartitionKey(uk.gov.gchq.gaffer.parquetstore.partitioner.PartitionKey) FreqMap(uk.gov.gchq.gaffer.types.FreqMap) BeforeAll(org.junit.jupiter.api.BeforeAll) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) Edge(uk.gov.gchq.gaffer.data.element.Edge) Assertions.assertEquals(org.junit.jupiter.api.Assertions.assertEquals) ParseException(java.text.ParseException) TestGroups(uk.gov.gchq.gaffer.commonutil.TestGroups) ParquetElementReader(uk.gov.gchq.gaffer.parquetstore.io.reader.ParquetElementReader) TimeZone(java.util.TimeZone) ParquetReader(org.apache.parquet.hadoop.ParquetReader) IOException(java.io.IOException) Entity(uk.gov.gchq.gaffer.data.element.Entity) Collectors(java.util.stream.Collectors) Test(org.junit.jupiter.api.Test) DataGen(uk.gov.gchq.gaffer.parquetstore.testutils.DataGen) List(java.util.List) WriteUnsortedData(uk.gov.gchq.gaffer.parquetstore.operation.handler.utilities.WriteUnsortedData) TempDir(org.junit.jupiter.api.io.TempDir) TestUtils(uk.gov.gchq.gaffer.parquetstore.testutils.TestUtils) Assertions.assertTrue(org.junit.jupiter.api.Assertions.assertTrue) GraphPartitioner(uk.gov.gchq.gaffer.parquetstore.partitioner.GraphPartitioner) CompressionCodecName(org.apache.parquet.hadoop.metadata.CompressionCodecName) OperationException(uk.gov.gchq.gaffer.operation.OperationException) WriteUnsortedData(uk.gov.gchq.gaffer.parquetstore.operation.handler.utilities.WriteUnsortedData) GraphPartitioner(uk.gov.gchq.gaffer.parquetstore.partitioner.GraphPartitioner) Element(uk.gov.gchq.gaffer.data.element.Element) ArrayList(java.util.ArrayList) Test(org.junit.jupiter.api.Test)

Example 3 with GroupPartitioner

use of uk.gov.gchq.gaffer.parquetstore.partitioner.GroupPartitioner in project Gaffer by gchq.

the class GroupPartitionerSerialiserTest method shouldSerialiseKeysToFileAndReadCorrectly.

@Test
public void shouldSerialiseKeysToFileAndReadCorrectly(@TempDir Path tempDir) throws IOException {
    // Given
    final Object[] key1 = new Object[] { 1L, 5, "ABC", 10F, (short) 1, (byte) 64, new byte[] { (byte) 1, (byte) 2, (byte) 3 } };
    final PartitionKey partitionKey1 = new PartitionKey(key1);
    final Object[] key2 = new Object[] { 100L, 500, "XYZ", 1000F, (short) 3, (byte) 55, new byte[] { (byte) 10, (byte) 9, (byte) 8, (byte) 7 } };
    final PartitionKey partitionKey2 = new PartitionKey(key2);
    final List<PartitionKey> splitPoints = new ArrayList<>();
    splitPoints.add(partitionKey1);
    splitPoints.add(partitionKey2);
    final GroupPartitioner groupPartitioner = new GroupPartitioner("GROUP", splitPoints);
    final GroupPartitionerSerialiser serialiser = new GroupPartitionerSerialiser();
    // When
    final String filename = tempDir.resolve("test").toString();
    final DataOutputStream dos = new DataOutputStream(new FileOutputStream(filename));
    serialiser.write(groupPartitioner, dos);
    dos.close();
    final DataInputStream dis = new DataInputStream(new FileInputStream(filename));
    final GroupPartitioner readGroupPartitioner = serialiser.read(dis);
    dis.close();
    // Then
    assertEquals(groupPartitioner, readGroupPartitioner);
}

Also used : GroupPartitioner(uk.gov.gchq.gaffer.parquetstore.partitioner.GroupPartitioner) DataOutputStream(java.io.DataOutputStream) FileOutputStream(java.io.FileOutputStream) ArrayList(java.util.ArrayList) PartitionKey(uk.gov.gchq.gaffer.parquetstore.partitioner.PartitionKey) DataInputStream(java.io.DataInputStream) FileInputStream(java.io.FileInputStream) Test(org.junit.jupiter.api.Test)

Example 4 with GroupPartitioner

use of uk.gov.gchq.gaffer.parquetstore.partitioner.GroupPartitioner in project Gaffer by gchq.

the class ParquetStore method initialise.

private void initialise() throws IOException, StoreException {
    // If data directory is empty or does not exist then this is the first time the store has been created.
    final Path dataDirPath = new Path(getDataDir());
    if (!fs.exists(dataDirPath) || 0 == fs.listStatus(dataDirPath).length) {
        LOGGER.info("Data directory {} doesn't exist or is empty so initialising directory structure", dataDirPath);
        currentSnapshot = System.currentTimeMillis();
        LOGGER.info("Initialising snapshot id to {}", currentSnapshot);
        final Path snapshotPath = new Path(dataDirPath, getSnapshotPath(currentSnapshot));
        LOGGER.info("Creating snapshot directory {}", snapshotPath);
        fs.mkdirs(snapshotPath);
        LOGGER.info("Creating group directories under {}", snapshotPath);
        for (final String group : getSchema().getGroups()) {
            final Path groupDir = getGroupPath(group);
            fs.mkdirs(groupDir);
            LOGGER.info("Created directory {}", groupDir);
        }
        LOGGER.info("Creating group directories for reversed edges under {}", snapshotPath);
        for (final String group : getSchema().getEdgeGroups()) {
            final Path groupDir = getGroupPathForReversedEdges(group);
            fs.mkdirs(groupDir);
            LOGGER.info("Created directory {}", groupDir);
        }
        LOGGER.info("Creating GraphPartitioner with 0 split points for each group");
        graphPartitioner = new GraphPartitioner();
        for (final String group : getSchema().getGroups()) {
            graphPartitioner.addGroupPartitioner(group, new GroupPartitioner(group, new ArrayList<>()));
        }
        for (final String group : getSchema().getEdgeGroups()) {
            graphPartitioner.addGroupPartitionerForReversedEdges(group, new GroupPartitioner(group, new ArrayList<>()));
        }
        LOGGER.info("Writing GraphPartitioner to snapshot directory");
        final FSDataOutputStream dataOutputStream = fs.create(getGraphPartitionerPath());
        new GraphPartitionerSerialiser().write(graphPartitioner, dataOutputStream);
        dataOutputStream.close();
        LOGGER.info("Wrote GraphPartitioner to file {}", getGraphPartitionerPath().toString());
    } else {
        LOGGER.info("Data directory {} exists and is non-empty, validating a snapshot directory exists", dataDirPath);
        final FileStatus[] fileStatuses = fs.listStatus(dataDirPath, f -> f.getName().startsWith(SNAPSHOT + "="));
        final List<FileStatus> directories = Arrays.stream(fileStatuses).filter(f -> f.isDirectory()).collect(Collectors.toList());
        if (0 == directories.size()) {
            LOGGER.error("Data directory {} should contain a snapshot directory", dataDirPath);
            throw new StoreException("Data directory should contain a snapshot directory");
        }
        this.currentSnapshot = getLatestSnapshot();
        LOGGER.info("Latest snapshot directory in data directory {} is {}", dataDirPath, this.currentSnapshot);
        LOGGER.info("Verifying snapshot directory contains the correct directories");
        for (final String group : getSchema().getGroups()) {
            final Path groupDir = getGroupPath(group);
            if (!fs.exists(groupDir)) {
                LOGGER.error("Directory {} should exist", groupDir);
                throw new StoreException("Group directory " + groupDir + " should exist in snapshot directory " + getSnapshotPath(this.currentSnapshot));
            }
        }
        for (final String group : getSchema().getEdgeGroups()) {
            final Path groupDir = getGroupPathForReversedEdges(group);
            if (!fs.exists(groupDir)) {
                LOGGER.error("Directory {} should exist", groupDir);
                throw new StoreException("Group directory " + groupDir + " should exist in snapshot directory " + getSnapshotPath(this.currentSnapshot));
            }
        }
    }
}

Also used : Path(org.apache.hadoop.fs.Path) GroupPartitioner(uk.gov.gchq.gaffer.parquetstore.partitioner.GroupPartitioner) GraphPartitionerSerialiser(uk.gov.gchq.gaffer.parquetstore.partitioner.serialisation.GraphPartitionerSerialiser) GetElementsHandler(uk.gov.gchq.gaffer.parquetstore.operation.handler.GetElementsHandler) InLineHyperLogLogPlusParquetSerialiser(uk.gov.gchq.gaffer.parquetstore.serialisation.impl.InLineHyperLogLogPlusParquetSerialiser) Arrays(java.util.Arrays) BooleanParquetSerialiser(uk.gov.gchq.gaffer.parquetstore.serialisation.impl.BooleanParquetSerialiser) DoubleParquetSerialiser(uk.gov.gchq.gaffer.parquetstore.serialisation.impl.DoubleParquetSerialiser) FileSystem(org.apache.hadoop.fs.FileSystem) GetAdjacentIdsHandler(uk.gov.gchq.gaffer.parquetstore.operation.handler.GetAdjacentIdsHandler) LoggerFactory(org.slf4j.LoggerFactory) ByteParquetSerialiser(uk.gov.gchq.gaffer.parquetstore.serialisation.impl.ByteParquetSerialiser) FileStatus(org.apache.hadoop.fs.FileStatus) Element(uk.gov.gchq.gaffer.data.element.Element) SchemaOptimiser(uk.gov.gchq.gaffer.store.schema.SchemaOptimiser) FloatParquetSerialiser(uk.gov.gchq.gaffer.parquetstore.serialisation.impl.FloatParquetSerialiser) CloseableIterable(uk.gov.gchq.gaffer.commonutil.iterable.CloseableIterable) Configuration(org.apache.hadoop.conf.Configuration) TreeSetStringParquetSerialiser(uk.gov.gchq.gaffer.parquetstore.serialisation.impl.TreeSetStringParquetSerialiser) Path(org.apache.hadoop.fs.Path) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) SerialisationFactory(uk.gov.gchq.gaffer.store.SerialisationFactory) ValidationResult(uk.gov.gchq.koryphe.ValidationResult) Partition(uk.gov.gchq.gaffer.parquetstore.partitioner.Partition) ImportJavaRDDOfElements(uk.gov.gchq.gaffer.spark.operation.javardd.ImportJavaRDDOfElements) GetDataFrameOfElements(uk.gov.gchq.gaffer.spark.operation.dataframe.GetDataFrameOfElements) SchemaUtils(uk.gov.gchq.gaffer.parquetstore.utils.SchemaUtils) Set(java.util.Set) Collectors(java.util.stream.Collectors) Sets(com.google.common.collect.Sets) Store(uk.gov.gchq.gaffer.store.Store) PRE_AGGREGATION_FILTERING(uk.gov.gchq.gaffer.store.StoreTrait.PRE_AGGREGATION_FILTERING) List(java.util.List) INGEST_AGGREGATION(uk.gov.gchq.gaffer.store.StoreTrait.INGEST_AGGREGATION) Entry(java.util.Map.Entry) AddElements(uk.gov.gchq.gaffer.operation.impl.add.AddElements) StoreTrait(uk.gov.gchq.gaffer.store.StoreTrait) OutputOperationHandler(uk.gov.gchq.gaffer.store.operation.handler.OutputOperationHandler) SuppressFBWarnings(edu.umd.cs.findbugs.annotations.SuppressFBWarnings) SchemaElementDefinition(uk.gov.gchq.gaffer.store.schema.SchemaElementDefinition) StoreException(uk.gov.gchq.gaffer.store.StoreException) Serialiser(uk.gov.gchq.gaffer.serialisation.Serialiser) ORDERED(uk.gov.gchq.gaffer.store.StoreTrait.ORDERED) GetElements(uk.gov.gchq.gaffer.operation.impl.get.GetElements) TypeSubTypeValueParquetSerialiser(uk.gov.gchq.gaffer.parquetstore.serialisation.impl.TypeSubTypeValueParquetSerialiser) ImportJavaRDDOfElementsHandler(uk.gov.gchq.gaffer.parquetstore.operation.handler.spark.ImportJavaRDDOfElementsHandler) GroupPartitioner(uk.gov.gchq.gaffer.parquetstore.partitioner.GroupPartitioner) ArrayList(java.util.ArrayList) CalculatePartitioner(uk.gov.gchq.gaffer.parquetstore.operation.handler.utilities.CalculatePartitioner) TypeValueParquetSerialiser(uk.gov.gchq.gaffer.parquetstore.serialisation.impl.TypeValueParquetSerialiser) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) IntegerParquetSerialiser(uk.gov.gchq.gaffer.parquetstore.serialisation.impl.IntegerParquetSerialiser) JavaSerialiser(uk.gov.gchq.gaffer.serialisation.implementation.JavaSerialiser) LongParquetSerialiser(uk.gov.gchq.gaffer.parquetstore.serialisation.impl.LongParquetSerialiser) ImportRDDOfElementsHandler(uk.gov.gchq.gaffer.parquetstore.operation.handler.spark.ImportRDDOfElementsHandler) Logger(org.slf4j.Logger) DateParquetSerialiser(uk.gov.gchq.gaffer.parquetstore.serialisation.impl.DateParquetSerialiser) StringParquetSerialiser(uk.gov.gchq.gaffer.parquetstore.serialisation.impl.StringParquetSerialiser) IOException(java.io.IOException) GetAllElementsHandler(uk.gov.gchq.gaffer.parquetstore.operation.handler.GetAllElementsHandler) StoreProperties(uk.gov.gchq.gaffer.store.StoreProperties) FreqMapParquetSerialiser(uk.gov.gchq.gaffer.parquetstore.serialisation.impl.FreqMapParquetSerialiser) IdentifierType(uk.gov.gchq.gaffer.data.element.IdentifierType) GraphPartitionerSerialiser(uk.gov.gchq.gaffer.parquetstore.partitioner.serialisation.GraphPartitionerSerialiser) AddElementsHandler(uk.gov.gchq.gaffer.parquetstore.operation.handler.AddElementsHandler) Schema(uk.gov.gchq.gaffer.store.schema.Schema) GetDataFrameOfElementsHandler(uk.gov.gchq.gaffer.parquetstore.operation.handler.spark.GetDataFrameOfElementsHandler) GraphPartitioner(uk.gov.gchq.gaffer.parquetstore.partitioner.GraphPartitioner) ImportRDDOfElements(uk.gov.gchq.gaffer.spark.operation.scalardd.ImportRDDOfElements) HashSetStringParquetSerialiser(uk.gov.gchq.gaffer.parquetstore.serialisation.impl.HashSetStringParquetSerialiser) OperationHandler(uk.gov.gchq.gaffer.store.operation.handler.OperationHandler) ArrayListStringParquetSerialiser(uk.gov.gchq.gaffer.parquetstore.serialisation.impl.ArrayListStringParquetSerialiser) ShortParquetSerialiser(uk.gov.gchq.gaffer.parquetstore.serialisation.impl.ShortParquetSerialiser) Collections(java.util.Collections) GraphPartitioner(uk.gov.gchq.gaffer.parquetstore.partitioner.GraphPartitioner) FileStatus(org.apache.hadoop.fs.FileStatus) ArrayList(java.util.ArrayList) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) StoreException(uk.gov.gchq.gaffer.store.StoreException)

Example 5 with GroupPartitioner

use of uk.gov.gchq.gaffer.parquetstore.partitioner.GroupPartitioner in project Gaffer by gchq.

the class GroupPartitionerSerialiserTest method testWithInfinitePartitionKeys.

@Test
public void testWithInfinitePartitionKeys(@TempDir Path tempDir) throws IOException {
    // Given
    final GroupPartitioner groupPartitioner = new GroupPartitioner("GROUP", new ArrayList<>());
    final GroupPartitionerSerialiser serialiser = new GroupPartitionerSerialiser();
    // When
    final String filename = tempDir.resolve("test").toString();
    final DataOutputStream dos = new DataOutputStream(new FileOutputStream(filename));
    serialiser.write(groupPartitioner, dos);
    dos.close();
    final DataInputStream dis = new DataInputStream(new FileInputStream(filename));
    final GroupPartitioner readGroupPartitioner = serialiser.read(dis);
    dis.close();
    // Then
    assertEquals(readGroupPartitioner, groupPartitioner);
}

Also used : GroupPartitioner(uk.gov.gchq.gaffer.parquetstore.partitioner.GroupPartitioner) DataOutputStream(java.io.DataOutputStream) FileOutputStream(java.io.FileOutputStream) DataInputStream(java.io.DataInputStream) FileInputStream(java.io.FileInputStream) Test(org.junit.jupiter.api.Test)

Aggregations

GroupPartitioner (uk.gov.gchq.gaffer.parquetstore.partitioner.GroupPartitioner)9 ArrayList (java.util.ArrayList)8 Test (org.junit.jupiter.api.Test)7 GraphPartitioner (uk.gov.gchq.gaffer.parquetstore.partitioner.GraphPartitioner)7 PartitionKey (uk.gov.gchq.gaffer.parquetstore.partitioner.PartitionKey)7 FileSystem (org.apache.hadoop.fs.FileSystem)6 Path (org.apache.hadoop.fs.Path)6 IOException (java.io.IOException)5 Arrays (java.util.Arrays)5 List (java.util.List)5 Configuration (org.apache.hadoop.conf.Configuration)5 FileStatus (org.apache.hadoop.fs.FileStatus)5 Element (uk.gov.gchq.gaffer.data.element.Element)5 Collectors (java.util.stream.Collectors)4 ParquetReader (org.apache.parquet.hadoop.ParquetReader)4 DataInputStream (java.io.DataInputStream)3 DataOutputStream (java.io.DataOutputStream)3 FileInputStream (java.io.FileInputStream)3 FileOutputStream (java.io.FileOutputStream)3 ParseException (java.text.ParseException)3