use of uk.gov.gchq.gaffer.parquetstore.partitioner.GraphPartitioner in project Gaffer by gchq.
the class QueryGenerator method getPathsForSeed.
private Set<PathInfo> getPathsForSeed(final ParquetElementSeed parquetElementSeed, final String group) {
final GraphPartitioner graphPartitioner = store.getGraphPartitioner();
final boolean isEntityGroup = store.getSchema().getEntityGroups().contains(group);
final List<Object[]> seeds = new ArrayList<>();
if (parquetElementSeed instanceof ParquetEntitySeed) {
seeds.add(((ParquetEntitySeed) parquetElementSeed).getSeed());
} else {
final ParquetEdgeSeed edgeSeed = (ParquetEdgeSeed) parquetElementSeed;
if (!isEntityGroup) {
Object[] seed = new Object[edgeSeed.getSource().length + edgeSeed.getDestination().length];
for (int i = 0; i < edgeSeed.getSource().length; i++) {
seed[i] = edgeSeed.getSource()[i];
}
for (int i = edgeSeed.getSource().length; i < seed.length; i++) {
seed[i] = edgeSeed.getDestination()[i - edgeSeed.getSource().length];
}
seeds.add(seed);
} else {
seeds.add(edgeSeed.getSource());
seeds.add(edgeSeed.getDestination());
}
}
final List<PathInfo> paths = new ArrayList<>();
for (final Object[] seed : seeds) {
final List<Integer> partitionIds = graphPartitioner.getGroupPartitioner(group).getPartitionIds(seed);
LOGGER.debug("Partition ids for seed {} in group {}: {}", seed, group, partitionIds);
final PathInfo.FILETYPE fileType = isEntityGroup ? PathInfo.FILETYPE.ENTITY : PathInfo.FILETYPE.EDGE;
partitionIds.forEach(id -> paths.add(new PathInfo(new Path(store.getFile(group, id)), group, fileType)));
if (!isEntityGroup && parquetElementSeed instanceof ParquetEntitySeed) {
final List<Integer> partitionIdsFromReversed = graphPartitioner.getGroupPartitionerForReversedEdges(group).getPartitionIds(seed);
partitionIdsFromReversed.forEach(id -> paths.add(new PathInfo(new Path(store.getFileForReversedEdges(group, id)), group, PathInfo.FILETYPE.REVERSED_EDGE)));
}
}
LOGGER.debug("Returning {} paths for seed {} and group {} (paths are {})", paths.size(), parquetElementSeed, group, paths);
return paths.stream().collect(Collectors.toSet());
}
use of uk.gov.gchq.gaffer.parquetstore.partitioner.GraphPartitioner in project Gaffer by gchq.
the class WriteUnsortedDataTest method testMultipleSplitPointsCase.
@Test
public void testMultipleSplitPointsCase(@TempDir java.nio.file.Path tempDir) throws IOException, OperationException {
// Given
final String tempFilesDir = tempDir.toAbsolutePath().toString();
final SchemaUtils schemaUtils = new SchemaUtils(TestUtils.gafferSchema("schemaUsingLongVertexType"));
final GraphPartitioner graphPartitioner = new GraphPartitioner();
final List<Element> elements = new ArrayList<>();
// TestGroups.ENTITY, split points are 10L and 100L. Create data with
// VERTEX
// 5L
// 10L
// 10L
// 11L
// 12L
// 100L
// 100L
// 200L
final List<PartitionKey> splitPointsEntity = new ArrayList<>();
splitPointsEntity.add(new PartitionKey(new Object[] { 10L }));
splitPointsEntity.add(new PartitionKey(new Object[] { 100L }));
graphPartitioner.addGroupPartitioner(TestGroups.ENTITY, new GroupPartitioner(TestGroups.ENTITY, splitPointsEntity));
elements.add(createEntityForEntityGroup(5L));
elements.add(createEntityForEntityGroup(10L));
elements.add(createEntityForEntityGroup(10L));
elements.add(createEntityForEntityGroup(11L));
elements.add(createEntityForEntityGroup(12L));
elements.add(createEntityForEntityGroup(100L));
elements.add(createEntityForEntityGroup(100L));
elements.add(createEntityForEntityGroup(200L));
// TestGroups.ENTITY_2, split points are 100L and 1000L. Create data with
// VERTEX
// 5L
// 100L
// 200L
// 1000L
// 5000L
final List<PartitionKey> splitPointsEntity_2 = new ArrayList<>();
splitPointsEntity_2.add(new PartitionKey(new Object[] { 100L }));
splitPointsEntity_2.add(new PartitionKey(new Object[] { 1000L }));
graphPartitioner.addGroupPartitioner(TestGroups.ENTITY_2, new GroupPartitioner(TestGroups.ENTITY_2, splitPointsEntity_2));
elements.add(createEntityForEntityGroup_2(5L));
elements.add(createEntityForEntityGroup_2(100L));
elements.add(createEntityForEntityGroup_2(200L));
elements.add(createEntityForEntityGroup_2(1000L));
elements.add(createEntityForEntityGroup_2(5000L));
// TestGroups.EDGE, split points are [1000L, 200L, true] and [1000L, 30000L, false]. Create data with
// SOURCE DESTINATION DIRECTED
// 5L 5000L true
// 5L 200L false
// 1000L 100L true
// 1000L 10000L false
// 1000L 30000L false
// 1000L 300000L true
// 10000L 400L false
final List<PartitionKey> splitPointsEdge = new ArrayList<>();
splitPointsEdge.add(new PartitionKey(new Object[] { 1000L, 200L, true }));
splitPointsEdge.add(new PartitionKey(new Object[] { 1000L, 30000L, false }));
graphPartitioner.addGroupPartitioner(TestGroups.EDGE, new GroupPartitioner(TestGroups.EDGE, splitPointsEdge));
final List<PartitionKey> splitPointsReversedEdge = new ArrayList<>();
splitPointsReversedEdge.add(new PartitionKey(new Object[] { 100L, 1000L, true }));
splitPointsReversedEdge.add(new PartitionKey(new Object[] { 300L, 2000L, false }));
graphPartitioner.addGroupPartitionerForReversedEdges(TestGroups.EDGE, new GroupPartitioner(TestGroups.EDGE, splitPointsReversedEdge));
elements.add(createEdgeForEdgeGroup(5L, 5000L, true));
elements.add(createEdgeForEdgeGroup(5L, 200L, false));
elements.add(createEdgeForEdgeGroup(1000L, 90L, true));
elements.add(createEdgeForEdgeGroup(1000L, 10000L, false));
elements.add(createEdgeForEdgeGroup(1000L, 30000L, false));
elements.add(createEdgeForEdgeGroup(1000L, 300000L, true));
elements.add(createEdgeForEdgeGroup(10000L, 400L, false));
// TestGroups.EDGE_2, split points are [10L, 2000L, true] and [100L, 1000L, false]. Create data with
// SOURCE DESTINATION DIRECTED
// 5L 5000L true
// 10L 2000L false
// 10L 2000L true
// 10L 3000L false
// 100L 1000L false
// 100L 3000L false
// 100L 3000L true
final List<PartitionKey> splitPointsEdge_2 = new ArrayList<>();
splitPointsEdge_2.add(new PartitionKey(new Object[] { 10L, 2000L, true }));
splitPointsEdge_2.add(new PartitionKey(new Object[] { 100L, 1000L, false }));
graphPartitioner.addGroupPartitioner(TestGroups.EDGE_2, new GroupPartitioner(TestGroups.EDGE_2, splitPointsEdge_2));
final List<PartitionKey> splitPointsReversedEdge_2 = new ArrayList<>();
splitPointsReversedEdge_2.add(new PartitionKey(new Object[] { 1000L, 1500L, true }));
splitPointsReversedEdge_2.add(new PartitionKey(new Object[] { 2000L, 2500L, false }));
graphPartitioner.addGroupPartitionerForReversedEdges(TestGroups.EDGE_2, new GroupPartitioner(TestGroups.EDGE_2, splitPointsReversedEdge_2));
elements.add(createEdgeForEdgeGroup_2(5L, 5000L, true));
elements.add(createEdgeForEdgeGroup_2(10L, 2000L, false));
elements.add(createEdgeForEdgeGroup_2(10L, 2000L, true));
elements.add(createEdgeForEdgeGroup_2(10L, 3000L, false));
elements.add(createEdgeForEdgeGroup_2(100L, 1000L, false));
elements.add(createEdgeForEdgeGroup_2(100L, 3000L, false));
elements.add(createEdgeForEdgeGroup_2(100L, 3000L, true));
final BiFunction<String, Integer, String> fileNameForGroupAndPartitionId = (group, partitionId) -> tempFilesDir + "/GROUP=" + group + "/split-" + partitionId;
final BiFunction<String, Integer, String> fileNameForGroupAndPartitionIdForReversedEdge = (group, partitionId) -> tempFilesDir + "/REVERSED-GROUP=" + group + "/split-" + partitionId;
final WriteUnsortedData writeUnsortedData = new WriteUnsortedData(tempFilesDir, CompressionCodecName.GZIP, schemaUtils, graphPartitioner, fileNameForGroupAndPartitionId, fileNameForGroupAndPartitionIdForReversedEdge);
// When
writeUnsortedData.writeElements(elements);
// Then
// - For each group, directories split0, split1 and split2 should exist and each contain one file
testExistsAndContainsNFiles(tempFilesDir + "/GROUP=" + TestGroups.ENTITY + "/split-0", 1);
testExistsAndContainsNFiles(tempFilesDir + "/GROUP=" + TestGroups.ENTITY + "/split-1", 1);
testExistsAndContainsNFiles(tempFilesDir + "/GROUP=" + TestGroups.ENTITY + "/split-2", 1);
testExistsAndContainsNFiles(tempFilesDir + "/GROUP=" + TestGroups.ENTITY_2 + "/split-0", 1);
testExistsAndContainsNFiles(tempFilesDir + "/GROUP=" + TestGroups.ENTITY_2 + "/split-1", 1);
testExistsAndContainsNFiles(tempFilesDir + "/GROUP=" + TestGroups.ENTITY_2 + "/split-2", 1);
testExistsAndContainsNFiles(tempFilesDir + "/GROUP=" + TestGroups.EDGE + "/split-0", 1);
testExistsAndContainsNFiles(tempFilesDir + "/GROUP=" + TestGroups.EDGE + "/split-1", 1);
testExistsAndContainsNFiles(tempFilesDir + "/GROUP=" + TestGroups.EDGE + "/split-2", 1);
testExistsAndContainsNFiles(tempFilesDir + "/GROUP=" + TestGroups.EDGE_2 + "/split-0", 1);
testExistsAndContainsNFiles(tempFilesDir + "/GROUP=" + TestGroups.EDGE_2 + "/split-1", 1);
testExistsAndContainsNFiles(tempFilesDir + "/GROUP=" + TestGroups.EDGE_2 + "/split-2", 1);
// - Each split file should contain the data for that split in the order it was written
for (final String group : new HashSet<>(Arrays.asList(TestGroups.ENTITY, TestGroups.ENTITY_2))) {
testSplitFileContainsCorrectData(tempFilesDir + "/GROUP=" + group + "/split-0", group, true, false, null, graphPartitioner.getGroupPartitioner(group).getIthPartitionKey(0), elements, schemaUtils);
testSplitFileContainsCorrectData(tempFilesDir + "/GROUP=" + group + "/split-1", group, true, false, graphPartitioner.getGroupPartitioner(group).getIthPartitionKey(0), graphPartitioner.getGroupPartitioner(group).getIthPartitionKey(1), elements, schemaUtils);
testSplitFileContainsCorrectData(tempFilesDir + "/GROUP=" + group + "/split-2", group, true, false, graphPartitioner.getGroupPartitioner(group).getIthPartitionKey(1), null, elements, schemaUtils);
}
for (final String group : new HashSet<>(Arrays.asList(TestGroups.EDGE, TestGroups.EDGE_2))) {
testSplitFileContainsCorrectData(tempFilesDir + "/GROUP=" + group + "/split-0", group, false, false, null, graphPartitioner.getGroupPartitioner(group).getIthPartitionKey(0), elements, schemaUtils);
testSplitFileContainsCorrectData(tempFilesDir + "/REVERSED-GROUP=" + group + "/split-0", group, false, true, null, graphPartitioner.getGroupPartitionerForReversedEdges(group).getIthPartitionKey(0), elements, schemaUtils);
testSplitFileContainsCorrectData(tempFilesDir + "/GROUP=" + group + "/split-1", group, false, false, graphPartitioner.getGroupPartitioner(group).getIthPartitionKey(0), graphPartitioner.getGroupPartitioner(group).getIthPartitionKey(1), elements, schemaUtils);
testSplitFileContainsCorrectData(tempFilesDir + "/REVERSED-GROUP=" + group + "/split-1", group, false, true, graphPartitioner.getGroupPartitionerForReversedEdges(group).getIthPartitionKey(0), graphPartitioner.getGroupPartitionerForReversedEdges(group).getIthPartitionKey(1), elements, schemaUtils);
testSplitFileContainsCorrectData(tempFilesDir + "/GROUP=" + group + "/split-2", group, false, false, graphPartitioner.getGroupPartitioner(group).getIthPartitionKey(1), null, elements, schemaUtils);
testSplitFileContainsCorrectData(tempFilesDir + "/REVERSED-GROUP=" + group + "/split-2", group, false, true, graphPartitioner.getGroupPartitionerForReversedEdges(group).getIthPartitionKey(1), null, elements, schemaUtils);
}
}
use of uk.gov.gchq.gaffer.parquetstore.partitioner.GraphPartitioner in project Gaffer by gchq.
the class WriteUnsortedDataTest method testOneSplitPointCase.
@Test
public void testOneSplitPointCase(@TempDir java.nio.file.Path tempDir) throws IOException, OperationException {
// Given
final String tempFilesDir = tempDir.toAbsolutePath().toString();
final SchemaUtils schemaUtils = new SchemaUtils(TestUtils.gafferSchema("schemaUsingLongVertexType"));
final GraphPartitioner graphPartitioner = new GraphPartitioner();
final List<Element> elements = new ArrayList<>();
// TestGroups.ENTITY, split point is 10L. Create data with
// VERTEX
// 5L
// 10L
// 10L
// 10L
// 20L
final List<PartitionKey> splitPointsEntity = new ArrayList<>();
splitPointsEntity.add(new PartitionKey(new Object[] { 10L }));
graphPartitioner.addGroupPartitioner(TestGroups.ENTITY, new GroupPartitioner(TestGroups.ENTITY, splitPointsEntity));
elements.add(createEntityForEntityGroup(5L));
elements.add(createEntityForEntityGroup(10L));
elements.add(createEntityForEntityGroup(10L));
elements.add(createEntityForEntityGroup(10L));
elements.add(createEntityForEntityGroup(20L));
// TestGroups.ENTITY_2, split point is 100L. Create data with
// VERTEX
// 5L
// 100L
// 1000L
final List<PartitionKey> splitPointsEntity_2 = new ArrayList<>();
splitPointsEntity_2.add(new PartitionKey(new Object[] { 100L }));
graphPartitioner.addGroupPartitioner(TestGroups.ENTITY_2, new GroupPartitioner(TestGroups.ENTITY_2, splitPointsEntity_2));
elements.add(createEntityForEntityGroup_2(5L));
elements.add(createEntityForEntityGroup_2(100L));
elements.add(createEntityForEntityGroup_2(1000L));
// TestGroups.EDGE, split point is [1000L, 200L, true]. Create data with
// SOURCE DESTINATION DIRECTED
// 5L 5000L true
// 5L 200L false
// 1000L 100L true
// 1000L 200L false
// 1000L 200L true
// 1000L 300L true
// 10000L 400L false
// 10000L 400L true
final List<PartitionKey> splitPointsEdge = new ArrayList<>();
splitPointsEdge.add(new PartitionKey(new Object[] { 1000L, 200L, true }));
graphPartitioner.addGroupPartitioner(TestGroups.EDGE, new GroupPartitioner(TestGroups.EDGE, splitPointsEdge));
final List<PartitionKey> splitPointsReversedEdge = new ArrayList<>();
splitPointsReversedEdge.add(new PartitionKey(new Object[] { 1000L, 300L, true }));
graphPartitioner.addGroupPartitionerForReversedEdges(TestGroups.EDGE, new GroupPartitioner(TestGroups.EDGE, splitPointsReversedEdge));
elements.add(createEdgeForEdgeGroup(5L, 5000L, true));
elements.add(createEdgeForEdgeGroup(5L, 200L, false));
elements.add(createEdgeForEdgeGroup(1000L, 100L, true));
elements.add(createEdgeForEdgeGroup(1000L, 200L, false));
elements.add(createEdgeForEdgeGroup(1000L, 200L, true));
elements.add(createEdgeForEdgeGroup(1000L, 300L, true));
elements.add(createEdgeForEdgeGroup(10000L, 400L, false));
elements.add(createEdgeForEdgeGroup(10000L, 400L, true));
// TestGroups.EDGE_2, split point is [10L, 2000L, true]. Create data with
// SOURCE DESTINATION DIRECTED
// 5L 5000L true
// 10L 2000L false
// 10L 2000L true
// 10L 3000L false
// 100L 1000L true
// 100L 3000L false
// 100L 3000L true
final List<PartitionKey> splitPointsEdge_2 = new ArrayList<>();
splitPointsEdge_2.add(new PartitionKey(new Object[] { 10L, 2000L, true }));
graphPartitioner.addGroupPartitioner(TestGroups.EDGE_2, new GroupPartitioner(TestGroups.EDGE_2, splitPointsEdge_2));
final List<PartitionKey> splitPointsReversedEdge_2 = new ArrayList<>();
splitPointsReversedEdge_2.add(new PartitionKey(new Object[] { 3000L, 20L, true }));
graphPartitioner.addGroupPartitionerForReversedEdges(TestGroups.EDGE_2, new GroupPartitioner(TestGroups.EDGE_2, splitPointsReversedEdge_2));
elements.add(createEdgeForEdgeGroup_2(5L, 5000L, true));
elements.add(createEdgeForEdgeGroup_2(5L, 200L, false));
elements.add(createEdgeForEdgeGroup_2(1000L, 100L, true));
elements.add(createEdgeForEdgeGroup_2(1000L, 200L, false));
elements.add(createEdgeForEdgeGroup_2(1000L, 200L, true));
elements.add(createEdgeForEdgeGroup_2(1000L, 300L, true));
elements.add(createEdgeForEdgeGroup_2(10000L, 400L, false));
elements.add(createEdgeForEdgeGroup_2(10000L, 400L, true));
final BiFunction<String, Integer, String> fileNameForGroupAndPartitionId = (group, partitionId) -> tempFilesDir + "/GROUP=" + group + "/split-" + partitionId;
final BiFunction<String, Integer, String> fileNameForGroupAndPartitionIdForReversedEdge = (group, partitionId) -> tempFilesDir + "/REVERSED-GROUP=" + group + "/split-" + partitionId;
final WriteUnsortedData writeUnsortedData = new WriteUnsortedData(tempFilesDir, CompressionCodecName.GZIP, schemaUtils, graphPartitioner, fileNameForGroupAndPartitionId, fileNameForGroupAndPartitionIdForReversedEdge);
// When
writeUnsortedData.writeElements(elements);
// Then
// - For each group, directories split0 and split1 should exist and each contain one file
testExistsAndContainsNFiles(tempFilesDir + "/GROUP=" + TestGroups.ENTITY + "/split-0", 1);
testExistsAndContainsNFiles(tempFilesDir + "/GROUP=" + TestGroups.ENTITY + "/split-1", 1);
testExistsAndContainsNFiles(tempFilesDir + "/GROUP=" + TestGroups.ENTITY_2 + "/split-0", 1);
testExistsAndContainsNFiles(tempFilesDir + "/GROUP=" + TestGroups.ENTITY_2 + "/split-1", 1);
testExistsAndContainsNFiles(tempFilesDir + "/GROUP=" + TestGroups.EDGE + "/split-0", 1);
testExistsAndContainsNFiles(tempFilesDir + "/GROUP=" + TestGroups.EDGE + "/split-1", 1);
testExistsAndContainsNFiles(tempFilesDir + "/GROUP=" + TestGroups.EDGE_2 + "/split-0", 1);
testExistsAndContainsNFiles(tempFilesDir + "/GROUP=" + TestGroups.EDGE_2 + "/split-1", 1);
testExistsAndContainsNFiles(tempFilesDir + "/REVERSED-GROUP=" + TestGroups.EDGE + "/split-0", 1);
testExistsAndContainsNFiles(tempFilesDir + "/REVERSED-GROUP=" + TestGroups.EDGE + "/split-1", 1);
testExistsAndContainsNFiles(tempFilesDir + "/REVERSED-GROUP=" + TestGroups.EDGE_2 + "/split-0", 1);
testExistsAndContainsNFiles(tempFilesDir + "/REVERSED-GROUP=" + TestGroups.EDGE_2 + "/split-1", 1);
// - Each split file should contain the data for that split in the order it was written
for (final String group : new HashSet<>(Arrays.asList(TestGroups.ENTITY, TestGroups.ENTITY_2))) {
testSplitFileContainsCorrectData(tempFilesDir + "/GROUP=" + group + "/split-0", group, true, false, null, graphPartitioner.getGroupPartitioner(group).getIthPartitionKey(0), elements, schemaUtils);
testSplitFileContainsCorrectData(tempFilesDir + "/GROUP=" + group + "/split-1", group, true, false, graphPartitioner.getGroupPartitioner(group).getIthPartitionKey(0), null, elements, schemaUtils);
}
for (final String group : new HashSet<>(Arrays.asList(TestGroups.EDGE, TestGroups.EDGE_2))) {
testSplitFileContainsCorrectData(tempFilesDir + "/GROUP=" + group + "/split-0", group, false, false, null, graphPartitioner.getGroupPartitioner(group).getIthPartitionKey(0), elements, schemaUtils);
testSplitFileContainsCorrectData(tempFilesDir + "/REVERSED-GROUP=" + group + "/split-0", group, false, true, null, graphPartitioner.getGroupPartitionerForReversedEdges(group).getIthPartitionKey(0), elements, schemaUtils);
testSplitFileContainsCorrectData(tempFilesDir + "/GROUP=" + group + "/split-1", group, false, false, graphPartitioner.getGroupPartitioner(group).getIthPartitionKey(0), null, elements, schemaUtils);
testSplitFileContainsCorrectData(tempFilesDir + "/REVERSED-GROUP=" + group + "/split-1", group, false, true, graphPartitioner.getGroupPartitionerForReversedEdges(group).getIthPartitionKey(0), null, elements, schemaUtils);
}
}
use of uk.gov.gchq.gaffer.parquetstore.partitioner.GraphPartitioner in project Gaffer by gchq.
the class GraphPartitionerSerialiserTest method shouldGroupMultiplePartitionKeysAndSerialiseCorrectly.
@Test
public void shouldGroupMultiplePartitionKeysAndSerialiseCorrectly(@TempDir Path tempDir) throws IOException {
// Given
final Object[] key1 = new Object[] { 1L, 5, "ABC", 10F, (short) 1, (byte) 64, new byte[] { (byte) 1, (byte) 2, (byte) 3 } };
final PartitionKey partitionKey1 = new PartitionKey(key1);
final Object[] key2 = new Object[] { 100L, 500, "XYZ", 1000F, (short) 3, (byte) 55, new byte[] { (byte) 10, (byte) 9, (byte) 8, (byte) 7 } };
final PartitionKey partitionKey2 = new PartitionKey(key2);
final List<PartitionKey> splitPoints1 = new ArrayList<>();
splitPoints1.add(partitionKey1);
splitPoints1.add(partitionKey2);
final GroupPartitioner groupPartitioner1 = new GroupPartitioner("GROUP", splitPoints1);
final Object[] key3 = new Object[] { 1000L, 5000, "ABCDEF", 10000F, (short) 19, (byte) 20, new byte[] { (byte) 4, (byte) 5, (byte) 6 } };
final PartitionKey partitionKey3 = new PartitionKey(key3);
final Object[] key4 = new Object[] { 100000L, 500000, "XYZZZZ", 100000F, (short) 32, (byte) 58, new byte[] { (byte) 20, (byte) 29, (byte) 28, (byte) 27 } };
final PartitionKey partitionKey4 = new PartitionKey(key4);
final List<PartitionKey> splitPoints2 = new ArrayList<>();
splitPoints2.add(partitionKey3);
splitPoints2.add(partitionKey4);
final GroupPartitioner groupPartitioner2 = new GroupPartitioner("GROUP2", splitPoints2);
final Object[] key5 = new Object[] { 10000000L, 5000000, "ABCDEFGHI", 100000F, (short) 21, (byte) 30, new byte[] { (byte) 10, (byte) 11, (byte) 12 } };
final PartitionKey partitionKey5 = new PartitionKey(key5);
final Object[] key6 = new Object[] { 100000000L, 5000, "ABCDEF", 10000F, (short) 19, (byte) 33, new byte[] { (byte) 13, (byte) 14, (byte) 15 } };
final PartitionKey partitionKey6 = new PartitionKey(key6);
final List<PartitionKey> splitPoints3 = new ArrayList<>();
splitPoints3.add(partitionKey5);
splitPoints3.add(partitionKey6);
final GroupPartitioner groupPartitioner3 = new GroupPartitioner("GROUP1", splitPoints3);
final GraphPartitioner graphPartitioner = new GraphPartitioner();
graphPartitioner.addGroupPartitioner("GROUP1", groupPartitioner1);
graphPartitioner.addGroupPartitioner("GROUP2", groupPartitioner2);
graphPartitioner.addGroupPartitionerForReversedEdges("GROUP1", groupPartitioner3);
final GraphPartitionerSerialiser serialiser = new GraphPartitionerSerialiser();
// When
final String filename = tempDir.resolve("test").toString();
final DataOutputStream dos = new DataOutputStream(new FileOutputStream(filename));
serialiser.write(graphPartitioner, dos);
dos.close();
final DataInputStream dis = new DataInputStream(new FileInputStream(filename));
final GraphPartitioner readGraphPartitioner = serialiser.read(dis);
dis.close();
// Then
assertEquals(graphPartitioner, readGraphPartitioner);
}
use of uk.gov.gchq.gaffer.parquetstore.partitioner.GraphPartitioner in project Gaffer by gchq.
the class CalculatePartitioner method call.
public GraphPartitioner call() throws IOException {
final SchemaUtils schemaUtils = new SchemaUtils(schema);
final GraphPartitioner graphPartitioner = new GraphPartitioner();
for (final String group : schema.getGroups()) {
LOGGER.info("Calculating GroupPartitioner for group {}", group);
final GafferGroupObjectConverter converter = schemaUtils.getConverter(group);
final List<PartitionKey> partitionKeys = new ArrayList<>();
final Path groupPath = new Path(path, ParquetStore.getGroupSubDir(group, false));
final FileStatus[] files = fs.listStatus(groupPath, p -> p.getName().endsWith(".parquet"));
final SortedSet<Path> sortedFiles = new TreeSet<>();
Arrays.stream(files).map(f -> f.getPath()).forEach(sortedFiles::add);
final Path[] sortedPaths = sortedFiles.toArray(new Path[] {});
LOGGER.debug("Found {} files in {}", files.length, groupPath);
for (int i = 1; i < sortedPaths.length; i++) {
// NB Skip first file
LOGGER.debug("Reading first line of {}", sortedPaths[i]);
final ParquetReader<Element> reader = new ParquetElementReader.Builder<Element>(sortedPaths[i]).isEntity(schema.getEntityGroups().contains(group)).usingConverter(converter).build();
// NB Should never be null as empty files are removed before this is called
final Element element = reader.read();
if (null == element) {
throw new IOException("No first element in file " + files[i].getPath() + " - empty files are supposed to be removed");
}
reader.close();
final Object[] parquetObjects = converter.corePropertiesToParquetObjects(element);
final PartitionKey key = new PartitionKey(parquetObjects);
partitionKeys.add(key);
}
final GroupPartitioner groupPartitioner = new GroupPartitioner(group, partitionKeys);
graphPartitioner.addGroupPartitioner(group, groupPartitioner);
LOGGER.info("GroupPartitioner for group {} is {}", group, groupPartitioner);
}
for (final String group : schema.getEdgeGroups()) {
LOGGER.info("Calculating GroupPartitioner for reversed edge group {}", group);
final GafferGroupObjectConverter converter = schemaUtils.getConverter(group);
final List<PartitionKey> partitionKeys = new ArrayList<>();
final Path groupPath = new Path(path, ParquetStore.getGroupSubDir(group, true));
final FileStatus[] files = fs.listStatus(groupPath, p -> p.getName().endsWith(".parquet"));
final SortedSet<Path> sortedFiles = new TreeSet<>();
Arrays.stream(files).map(f -> f.getPath()).forEach(sortedFiles::add);
final Path[] sortedPaths = sortedFiles.toArray(new Path[] {});
LOGGER.debug("Found {} files in {}", files.length, groupPath);
for (int i = 1; i < sortedPaths.length; i++) {
// NB Skip first file
LOGGER.debug("Reading first line of {}", sortedPaths[i]);
final ParquetReader<Element> reader = new ParquetElementReader.Builder<Element>(sortedPaths[i]).isEntity(false).usingConverter(converter).build();
final Edge edge = (Edge) reader.read();
if (null == edge) {
throw new IOException("No first edge in file " + files[i].getPath() + " - empty files are supposed to be removed");
}
reader.close();
final Object[] parquetObjects = converter.corePropertiesToParquetObjectsForReversedEdge(edge);
final PartitionKey key = new PartitionKey(parquetObjects);
partitionKeys.add(key);
}
final GroupPartitioner groupPartitioner = new GroupPartitioner(group, partitionKeys);
graphPartitioner.addGroupPartitionerForReversedEdges(group, groupPartitioner);
}
return graphPartitioner;
}
Aggregations