use of org.junit.jupiter.api.io.TempDir in project Gaffer by gchq.
the class WriteUnsortedDataTest method testNoSplitPointsCase.
@Test
public void testNoSplitPointsCase(@TempDir java.nio.file.Path tempDir) throws IOException, OperationException {
// Given
final String tempFilesDir = tempDir.toAbsolutePath().toString();
final SchemaUtils schemaUtils = new SchemaUtils(TestUtils.gafferSchema("schemaUsingLongVertexType"));
final GraphPartitioner graphPartitioner = new GraphPartitioner();
graphPartitioner.addGroupPartitioner(TestGroups.ENTITY, new GroupPartitioner(TestGroups.ENTITY, new ArrayList<>()));
graphPartitioner.addGroupPartitioner(TestGroups.ENTITY_2, new GroupPartitioner(TestGroups.ENTITY_2, new ArrayList<>()));
graphPartitioner.addGroupPartitioner(TestGroups.EDGE, new GroupPartitioner(TestGroups.EDGE, new ArrayList<>()));
graphPartitioner.addGroupPartitioner(TestGroups.EDGE_2, new GroupPartitioner(TestGroups.EDGE_2, new ArrayList<>()));
graphPartitioner.addGroupPartitionerForReversedEdges(TestGroups.EDGE, new GroupPartitioner(TestGroups.EDGE, new ArrayList<>()));
graphPartitioner.addGroupPartitionerForReversedEdges(TestGroups.EDGE_2, new GroupPartitioner(TestGroups.EDGE_2, new ArrayList<>()));
final List<Element> elements = getData(3L);
final BiFunction<String, Integer, String> fileNameForGroupAndPartitionId = (group, partitionId) -> tempFilesDir + "/GROUP=" + group + "/split-" + partitionId;
final BiFunction<String, Integer, String> fileNameForGroupAndPartitionIdForReversedEdge = (group, partitionId) -> tempFilesDir + "/REVERSED-GROUP=" + group + "/split-" + partitionId;
final WriteUnsortedData writeUnsortedData = new WriteUnsortedData(tempFilesDir, CompressionCodecName.GZIP, schemaUtils, graphPartitioner, fileNameForGroupAndPartitionId, fileNameForGroupAndPartitionIdForReversedEdge);
// When
writeUnsortedData.writeElements(elements);
// Then
// - Each directory should exist and contain one file
testExistsAndContainsNFiles(tempFilesDir + "/GROUP=" + TestGroups.ENTITY + "/split-0", 1);
testExistsAndContainsNFiles(tempFilesDir + "/GROUP=" + TestGroups.ENTITY_2 + "/split-0", 1);
testExistsAndContainsNFiles(tempFilesDir + "/GROUP=" + TestGroups.EDGE + "/split-0", 1);
testExistsAndContainsNFiles(tempFilesDir + "/GROUP=" + TestGroups.EDGE_2 + "/split-0", 1);
testExistsAndContainsNFiles(tempFilesDir + "/REVERSED-GROUP=" + TestGroups.EDGE + "/split-0", 1);
testExistsAndContainsNFiles(tempFilesDir + "/REVERSED-GROUP=" + TestGroups.EDGE_2 + "/split-0", 1);
// - Each file should contain the data that was written to it, in the order it was in the iterable
testContainsCorrectDataNoSplitPoints(TestGroups.ENTITY, tempFilesDir + "/GROUP=" + TestGroups.ENTITY + "/split-0", elements, schemaUtils);
testContainsCorrectDataNoSplitPoints(TestGroups.ENTITY_2, tempFilesDir + "/GROUP=" + TestGroups.ENTITY_2 + "/split-0", elements, schemaUtils);
testContainsCorrectDataNoSplitPoints(TestGroups.EDGE, tempFilesDir + "/GROUP=" + TestGroups.EDGE + "/split-0", elements, schemaUtils);
testContainsCorrectDataNoSplitPoints(TestGroups.EDGE_2, tempFilesDir + "/GROUP=" + TestGroups.EDGE_2 + "/split-0", elements, schemaUtils);
testContainsCorrectDataNoSplitPoints(TestGroups.EDGE, tempFilesDir + "/REVERSED-GROUP=" + TestGroups.EDGE + "/split-0", elements, schemaUtils);
final List<Element> elementsWithSameSrcDstRemoved = elements.stream().filter(e -> e.getGroup().equals(TestGroups.EDGE_2)).map(e -> (Edge) e).filter(e -> !e.getSource().equals(e.getDestination())).collect(Collectors.toList());
testContainsCorrectDataNoSplitPoints(TestGroups.EDGE_2, tempFilesDir + "/REVERSED-GROUP=" + TestGroups.EDGE_2 + "/split-0", elementsWithSameSrcDstRemoved, schemaUtils);
}
use of org.junit.jupiter.api.io.TempDir in project Gaffer by gchq.
the class WriteDataTest method testTwoWritesToSamePartitionDoesntThrowException.
@Test
public void testTwoWritesToSamePartitionDoesntThrowException(@TempDir java.nio.file.Path tempDir) throws Exception {
// Given
final Schema schema = new Schema.Builder().type("int", new TypeDefinition.Builder().clazz(Integer.class).serialiser(new IntegerParquetSerialiser()).build()).type("string", new TypeDefinition.Builder().clazz(String.class).serialiser(new StringParquetSerialiser()).build()).entity("entity", new SchemaEntityDefinition.Builder().vertex("string").property("property1", "int").aggregate(false).build()).edge("edge", new SchemaEdgeDefinition.Builder().source("string").destination("string").property("property2", "int").aggregate(false).build()).vertexSerialiser(new StringParquetSerialiser()).build();
final Function<String, String> groupToDirectory = group -> tempDir.toAbsolutePath().toString() + "/" + group;
final List<Element> elements = new ArrayList<>();
elements.add(new Entity.Builder().group("entity").vertex("A").property("property1", 1).build());
elements.add(new Edge.Builder().group("edge").source("B").dest("C").property("property2", 100).build());
final WriteData writeData = new WriteData(groupToDirectory, schema, CompressionCodecName.GZIP);
final FileSystem fileSystem = FileSystem.get(new Configuration());
// When
final ExecutorService executorService = Executors.newFixedThreadPool(3);
final List<Callable<Void>> tasks = new ArrayList<>();
LongStream.range(1000L, 1003L).forEach(l -> {
tasks.add(() -> {
writeData.call(elements.iterator(), 1, l);
return null;
});
});
executorService.invokeAll(tasks);
// Then
// - Check that a file named with the partition id has been created
assertTrue(fileSystem.exists(new Path(groupToDirectory.apply("entity") + "/" + "input-1.parquet")));
assertTrue(fileSystem.exists(new Path(groupToDirectory.apply("edge") + "/" + "input-1.parquet")));
}
use of org.junit.jupiter.api.io.TempDir in project flink by apache.
the class TaskExecutorRecoveryTest method testRecoveredTaskExecutorWillRestoreAllocationState.
@Test
public void testRecoveredTaskExecutorWillRestoreAllocationState(@TempDir File tempDir) throws Exception {
final ResourceID resourceId = ResourceID.generate();
final Configuration configuration = new Configuration();
configuration.set(TaskManagerOptions.NUM_TASK_SLOTS, 2);
configuration.set(CheckpointingOptions.LOCAL_RECOVERY, true);
final TestingResourceManagerGateway testingResourceManagerGateway = new TestingResourceManagerGateway();
final ArrayBlockingQueue<TaskExecutorSlotReport> queue = new ArrayBlockingQueue<>(2);
testingResourceManagerGateway.setSendSlotReportFunction(slotReportInformation -> {
queue.offer(TaskExecutorSlotReport.create(slotReportInformation.f0, slotReportInformation.f2));
return CompletableFuture.completedFuture(Acknowledge.get());
});
final TestingRpcService rpcService = rpcServiceExtension.getTestingRpcService();
rpcService.registerGateway(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway);
final JobID jobId = new JobID();
final TestingHighAvailabilityServices highAvailabilityServices = new TestingHighAvailabilityServices();
highAvailabilityServices.setResourceManagerLeaderRetriever(new SettableLeaderRetrievalService(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway.getFencingToken().toUUID()));
final SettableLeaderRetrievalService jobMasterLeaderRetriever = new SettableLeaderRetrievalService();
highAvailabilityServices.setJobMasterLeaderRetriever(jobId, jobMasterLeaderRetriever);
final WorkingDirectory workingDirectory = WorkingDirectory.create(tempDir);
final TaskExecutor taskExecutor = TaskExecutorBuilder.newBuilder(rpcService, highAvailabilityServices, workingDirectory).setConfiguration(configuration).setResourceId(resourceId).build();
taskExecutor.start();
final TaskExecutorGateway taskExecutorGateway = taskExecutor.getSelfGateway(TaskExecutorGateway.class);
final TaskExecutorSlotReport taskExecutorSlotReport = queue.take();
final SlotReport slotReport = taskExecutorSlotReport.getSlotReport();
assertThat(slotReport.getNumSlotStatus(), is(2));
final SlotStatus slotStatus = slotReport.iterator().next();
final SlotID allocatedSlotID = slotStatus.getSlotID();
final AllocationID allocationId = new AllocationID();
taskExecutorGateway.requestSlot(allocatedSlotID, jobId, allocationId, slotStatus.getResourceProfile(), "localhost", testingResourceManagerGateway.getFencingToken(), Time.seconds(10L)).join();
taskExecutor.close();
final BlockingQueue<Collection<SlotOffer>> offeredSlots = new ArrayBlockingQueue<>(1);
final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder().setOfferSlotsFunction((resourceID, slotOffers) -> {
offeredSlots.offer(new HashSet<>(slotOffers));
return CompletableFuture.completedFuture(slotOffers);
}).build();
rpcService.registerGateway(jobMasterGateway.getAddress(), jobMasterGateway);
jobMasterLeaderRetriever.notifyListener(jobMasterGateway.getAddress(), jobMasterGateway.getFencingToken().toUUID());
// recover the TaskExecutor
final TaskExecutor recoveredTaskExecutor = TaskExecutorBuilder.newBuilder(rpcService, highAvailabilityServices, workingDirectory).setConfiguration(configuration).setResourceId(resourceId).build();
recoveredTaskExecutor.start();
final TaskExecutorSlotReport recoveredSlotReport = queue.take();
for (SlotStatus status : recoveredSlotReport.getSlotReport()) {
if (status.getSlotID().equals(allocatedSlotID)) {
assertThat(status.getJobID(), is(jobId));
assertThat(status.getAllocationID(), is(allocationId));
} else {
assertThat(status.getJobID(), is(nullValue()));
}
}
final Collection<SlotOffer> take = offeredSlots.take();
assertThat(take, hasSize(1));
final SlotOffer offeredSlot = take.iterator().next();
assertThat(offeredSlot.getAllocationId(), is(allocationId));
}
use of org.junit.jupiter.api.io.TempDir in project Gaffer by gchq.
the class WriteUnsortedDataTest method testMultipleSplitPointsCase.
@Test
public void testMultipleSplitPointsCase(@TempDir java.nio.file.Path tempDir) throws IOException, OperationException {
// Given
final String tempFilesDir = tempDir.toAbsolutePath().toString();
final SchemaUtils schemaUtils = new SchemaUtils(TestUtils.gafferSchema("schemaUsingLongVertexType"));
final GraphPartitioner graphPartitioner = new GraphPartitioner();
final List<Element> elements = new ArrayList<>();
// TestGroups.ENTITY, split points are 10L and 100L. Create data with
// VERTEX
// 5L
// 10L
// 10L
// 11L
// 12L
// 100L
// 100L
// 200L
final List<PartitionKey> splitPointsEntity = new ArrayList<>();
splitPointsEntity.add(new PartitionKey(new Object[] { 10L }));
splitPointsEntity.add(new PartitionKey(new Object[] { 100L }));
graphPartitioner.addGroupPartitioner(TestGroups.ENTITY, new GroupPartitioner(TestGroups.ENTITY, splitPointsEntity));
elements.add(createEntityForEntityGroup(5L));
elements.add(createEntityForEntityGroup(10L));
elements.add(createEntityForEntityGroup(10L));
elements.add(createEntityForEntityGroup(11L));
elements.add(createEntityForEntityGroup(12L));
elements.add(createEntityForEntityGroup(100L));
elements.add(createEntityForEntityGroup(100L));
elements.add(createEntityForEntityGroup(200L));
// TestGroups.ENTITY_2, split points are 100L and 1000L. Create data with
// VERTEX
// 5L
// 100L
// 200L
// 1000L
// 5000L
final List<PartitionKey> splitPointsEntity_2 = new ArrayList<>();
splitPointsEntity_2.add(new PartitionKey(new Object[] { 100L }));
splitPointsEntity_2.add(new PartitionKey(new Object[] { 1000L }));
graphPartitioner.addGroupPartitioner(TestGroups.ENTITY_2, new GroupPartitioner(TestGroups.ENTITY_2, splitPointsEntity_2));
elements.add(createEntityForEntityGroup_2(5L));
elements.add(createEntityForEntityGroup_2(100L));
elements.add(createEntityForEntityGroup_2(200L));
elements.add(createEntityForEntityGroup_2(1000L));
elements.add(createEntityForEntityGroup_2(5000L));
// TestGroups.EDGE, split points are [1000L, 200L, true] and [1000L, 30000L, false]. Create data with
// SOURCE DESTINATION DIRECTED
// 5L 5000L true
// 5L 200L false
// 1000L 100L true
// 1000L 10000L false
// 1000L 30000L false
// 1000L 300000L true
// 10000L 400L false
final List<PartitionKey> splitPointsEdge = new ArrayList<>();
splitPointsEdge.add(new PartitionKey(new Object[] { 1000L, 200L, true }));
splitPointsEdge.add(new PartitionKey(new Object[] { 1000L, 30000L, false }));
graphPartitioner.addGroupPartitioner(TestGroups.EDGE, new GroupPartitioner(TestGroups.EDGE, splitPointsEdge));
final List<PartitionKey> splitPointsReversedEdge = new ArrayList<>();
splitPointsReversedEdge.add(new PartitionKey(new Object[] { 100L, 1000L, true }));
splitPointsReversedEdge.add(new PartitionKey(new Object[] { 300L, 2000L, false }));
graphPartitioner.addGroupPartitionerForReversedEdges(TestGroups.EDGE, new GroupPartitioner(TestGroups.EDGE, splitPointsReversedEdge));
elements.add(createEdgeForEdgeGroup(5L, 5000L, true));
elements.add(createEdgeForEdgeGroup(5L, 200L, false));
elements.add(createEdgeForEdgeGroup(1000L, 90L, true));
elements.add(createEdgeForEdgeGroup(1000L, 10000L, false));
elements.add(createEdgeForEdgeGroup(1000L, 30000L, false));
elements.add(createEdgeForEdgeGroup(1000L, 300000L, true));
elements.add(createEdgeForEdgeGroup(10000L, 400L, false));
// TestGroups.EDGE_2, split points are [10L, 2000L, true] and [100L, 1000L, false]. Create data with
// SOURCE DESTINATION DIRECTED
// 5L 5000L true
// 10L 2000L false
// 10L 2000L true
// 10L 3000L false
// 100L 1000L false
// 100L 3000L false
// 100L 3000L true
final List<PartitionKey> splitPointsEdge_2 = new ArrayList<>();
splitPointsEdge_2.add(new PartitionKey(new Object[] { 10L, 2000L, true }));
splitPointsEdge_2.add(new PartitionKey(new Object[] { 100L, 1000L, false }));
graphPartitioner.addGroupPartitioner(TestGroups.EDGE_2, new GroupPartitioner(TestGroups.EDGE_2, splitPointsEdge_2));
final List<PartitionKey> splitPointsReversedEdge_2 = new ArrayList<>();
splitPointsReversedEdge_2.add(new PartitionKey(new Object[] { 1000L, 1500L, true }));
splitPointsReversedEdge_2.add(new PartitionKey(new Object[] { 2000L, 2500L, false }));
graphPartitioner.addGroupPartitionerForReversedEdges(TestGroups.EDGE_2, new GroupPartitioner(TestGroups.EDGE_2, splitPointsReversedEdge_2));
elements.add(createEdgeForEdgeGroup_2(5L, 5000L, true));
elements.add(createEdgeForEdgeGroup_2(10L, 2000L, false));
elements.add(createEdgeForEdgeGroup_2(10L, 2000L, true));
elements.add(createEdgeForEdgeGroup_2(10L, 3000L, false));
elements.add(createEdgeForEdgeGroup_2(100L, 1000L, false));
elements.add(createEdgeForEdgeGroup_2(100L, 3000L, false));
elements.add(createEdgeForEdgeGroup_2(100L, 3000L, true));
final BiFunction<String, Integer, String> fileNameForGroupAndPartitionId = (group, partitionId) -> tempFilesDir + "/GROUP=" + group + "/split-" + partitionId;
final BiFunction<String, Integer, String> fileNameForGroupAndPartitionIdForReversedEdge = (group, partitionId) -> tempFilesDir + "/REVERSED-GROUP=" + group + "/split-" + partitionId;
final WriteUnsortedData writeUnsortedData = new WriteUnsortedData(tempFilesDir, CompressionCodecName.GZIP, schemaUtils, graphPartitioner, fileNameForGroupAndPartitionId, fileNameForGroupAndPartitionIdForReversedEdge);
// When
writeUnsortedData.writeElements(elements);
// Then
// - For each group, directories split0, split1 and split2 should exist and each contain one file
testExistsAndContainsNFiles(tempFilesDir + "/GROUP=" + TestGroups.ENTITY + "/split-0", 1);
testExistsAndContainsNFiles(tempFilesDir + "/GROUP=" + TestGroups.ENTITY + "/split-1", 1);
testExistsAndContainsNFiles(tempFilesDir + "/GROUP=" + TestGroups.ENTITY + "/split-2", 1);
testExistsAndContainsNFiles(tempFilesDir + "/GROUP=" + TestGroups.ENTITY_2 + "/split-0", 1);
testExistsAndContainsNFiles(tempFilesDir + "/GROUP=" + TestGroups.ENTITY_2 + "/split-1", 1);
testExistsAndContainsNFiles(tempFilesDir + "/GROUP=" + TestGroups.ENTITY_2 + "/split-2", 1);
testExistsAndContainsNFiles(tempFilesDir + "/GROUP=" + TestGroups.EDGE + "/split-0", 1);
testExistsAndContainsNFiles(tempFilesDir + "/GROUP=" + TestGroups.EDGE + "/split-1", 1);
testExistsAndContainsNFiles(tempFilesDir + "/GROUP=" + TestGroups.EDGE + "/split-2", 1);
testExistsAndContainsNFiles(tempFilesDir + "/GROUP=" + TestGroups.EDGE_2 + "/split-0", 1);
testExistsAndContainsNFiles(tempFilesDir + "/GROUP=" + TestGroups.EDGE_2 + "/split-1", 1);
testExistsAndContainsNFiles(tempFilesDir + "/GROUP=" + TestGroups.EDGE_2 + "/split-2", 1);
// - Each split file should contain the data for that split in the order it was written
for (final String group : new HashSet<>(Arrays.asList(TestGroups.ENTITY, TestGroups.ENTITY_2))) {
testSplitFileContainsCorrectData(tempFilesDir + "/GROUP=" + group + "/split-0", group, true, false, null, graphPartitioner.getGroupPartitioner(group).getIthPartitionKey(0), elements, schemaUtils);
testSplitFileContainsCorrectData(tempFilesDir + "/GROUP=" + group + "/split-1", group, true, false, graphPartitioner.getGroupPartitioner(group).getIthPartitionKey(0), graphPartitioner.getGroupPartitioner(group).getIthPartitionKey(1), elements, schemaUtils);
testSplitFileContainsCorrectData(tempFilesDir + "/GROUP=" + group + "/split-2", group, true, false, graphPartitioner.getGroupPartitioner(group).getIthPartitionKey(1), null, elements, schemaUtils);
}
for (final String group : new HashSet<>(Arrays.asList(TestGroups.EDGE, TestGroups.EDGE_2))) {
testSplitFileContainsCorrectData(tempFilesDir + "/GROUP=" + group + "/split-0", group, false, false, null, graphPartitioner.getGroupPartitioner(group).getIthPartitionKey(0), elements, schemaUtils);
testSplitFileContainsCorrectData(tempFilesDir + "/REVERSED-GROUP=" + group + "/split-0", group, false, true, null, graphPartitioner.getGroupPartitionerForReversedEdges(group).getIthPartitionKey(0), elements, schemaUtils);
testSplitFileContainsCorrectData(tempFilesDir + "/GROUP=" + group + "/split-1", group, false, false, graphPartitioner.getGroupPartitioner(group).getIthPartitionKey(0), graphPartitioner.getGroupPartitioner(group).getIthPartitionKey(1), elements, schemaUtils);
testSplitFileContainsCorrectData(tempFilesDir + "/REVERSED-GROUP=" + group + "/split-1", group, false, true, graphPartitioner.getGroupPartitionerForReversedEdges(group).getIthPartitionKey(0), graphPartitioner.getGroupPartitionerForReversedEdges(group).getIthPartitionKey(1), elements, schemaUtils);
testSplitFileContainsCorrectData(tempFilesDir + "/GROUP=" + group + "/split-2", group, false, false, graphPartitioner.getGroupPartitioner(group).getIthPartitionKey(1), null, elements, schemaUtils);
testSplitFileContainsCorrectData(tempFilesDir + "/REVERSED-GROUP=" + group + "/split-2", group, false, true, graphPartitioner.getGroupPartitionerForReversedEdges(group).getIthPartitionKey(1), null, elements, schemaUtils);
}
}
use of org.junit.jupiter.api.io.TempDir in project Gaffer by gchq.
the class WriteUnsortedDataTest method testOneSplitPointCase.
@Test
public void testOneSplitPointCase(@TempDir java.nio.file.Path tempDir) throws IOException, OperationException {
// Given
final String tempFilesDir = tempDir.toAbsolutePath().toString();
final SchemaUtils schemaUtils = new SchemaUtils(TestUtils.gafferSchema("schemaUsingLongVertexType"));
final GraphPartitioner graphPartitioner = new GraphPartitioner();
final List<Element> elements = new ArrayList<>();
// TestGroups.ENTITY, split point is 10L. Create data with
// VERTEX
// 5L
// 10L
// 10L
// 10L
// 20L
final List<PartitionKey> splitPointsEntity = new ArrayList<>();
splitPointsEntity.add(new PartitionKey(new Object[] { 10L }));
graphPartitioner.addGroupPartitioner(TestGroups.ENTITY, new GroupPartitioner(TestGroups.ENTITY, splitPointsEntity));
elements.add(createEntityForEntityGroup(5L));
elements.add(createEntityForEntityGroup(10L));
elements.add(createEntityForEntityGroup(10L));
elements.add(createEntityForEntityGroup(10L));
elements.add(createEntityForEntityGroup(20L));
// TestGroups.ENTITY_2, split point is 100L. Create data with
// VERTEX
// 5L
// 100L
// 1000L
final List<PartitionKey> splitPointsEntity_2 = new ArrayList<>();
splitPointsEntity_2.add(new PartitionKey(new Object[] { 100L }));
graphPartitioner.addGroupPartitioner(TestGroups.ENTITY_2, new GroupPartitioner(TestGroups.ENTITY_2, splitPointsEntity_2));
elements.add(createEntityForEntityGroup_2(5L));
elements.add(createEntityForEntityGroup_2(100L));
elements.add(createEntityForEntityGroup_2(1000L));
// TestGroups.EDGE, split point is [1000L, 200L, true]. Create data with
// SOURCE DESTINATION DIRECTED
// 5L 5000L true
// 5L 200L false
// 1000L 100L true
// 1000L 200L false
// 1000L 200L true
// 1000L 300L true
// 10000L 400L false
// 10000L 400L true
final List<PartitionKey> splitPointsEdge = new ArrayList<>();
splitPointsEdge.add(new PartitionKey(new Object[] { 1000L, 200L, true }));
graphPartitioner.addGroupPartitioner(TestGroups.EDGE, new GroupPartitioner(TestGroups.EDGE, splitPointsEdge));
final List<PartitionKey> splitPointsReversedEdge = new ArrayList<>();
splitPointsReversedEdge.add(new PartitionKey(new Object[] { 1000L, 300L, true }));
graphPartitioner.addGroupPartitionerForReversedEdges(TestGroups.EDGE, new GroupPartitioner(TestGroups.EDGE, splitPointsReversedEdge));
elements.add(createEdgeForEdgeGroup(5L, 5000L, true));
elements.add(createEdgeForEdgeGroup(5L, 200L, false));
elements.add(createEdgeForEdgeGroup(1000L, 100L, true));
elements.add(createEdgeForEdgeGroup(1000L, 200L, false));
elements.add(createEdgeForEdgeGroup(1000L, 200L, true));
elements.add(createEdgeForEdgeGroup(1000L, 300L, true));
elements.add(createEdgeForEdgeGroup(10000L, 400L, false));
elements.add(createEdgeForEdgeGroup(10000L, 400L, true));
// TestGroups.EDGE_2, split point is [10L, 2000L, true]. Create data with
// SOURCE DESTINATION DIRECTED
// 5L 5000L true
// 10L 2000L false
// 10L 2000L true
// 10L 3000L false
// 100L 1000L true
// 100L 3000L false
// 100L 3000L true
final List<PartitionKey> splitPointsEdge_2 = new ArrayList<>();
splitPointsEdge_2.add(new PartitionKey(new Object[] { 10L, 2000L, true }));
graphPartitioner.addGroupPartitioner(TestGroups.EDGE_2, new GroupPartitioner(TestGroups.EDGE_2, splitPointsEdge_2));
final List<PartitionKey> splitPointsReversedEdge_2 = new ArrayList<>();
splitPointsReversedEdge_2.add(new PartitionKey(new Object[] { 3000L, 20L, true }));
graphPartitioner.addGroupPartitionerForReversedEdges(TestGroups.EDGE_2, new GroupPartitioner(TestGroups.EDGE_2, splitPointsReversedEdge_2));
elements.add(createEdgeForEdgeGroup_2(5L, 5000L, true));
elements.add(createEdgeForEdgeGroup_2(5L, 200L, false));
elements.add(createEdgeForEdgeGroup_2(1000L, 100L, true));
elements.add(createEdgeForEdgeGroup_2(1000L, 200L, false));
elements.add(createEdgeForEdgeGroup_2(1000L, 200L, true));
elements.add(createEdgeForEdgeGroup_2(1000L, 300L, true));
elements.add(createEdgeForEdgeGroup_2(10000L, 400L, false));
elements.add(createEdgeForEdgeGroup_2(10000L, 400L, true));
final BiFunction<String, Integer, String> fileNameForGroupAndPartitionId = (group, partitionId) -> tempFilesDir + "/GROUP=" + group + "/split-" + partitionId;
final BiFunction<String, Integer, String> fileNameForGroupAndPartitionIdForReversedEdge = (group, partitionId) -> tempFilesDir + "/REVERSED-GROUP=" + group + "/split-" + partitionId;
final WriteUnsortedData writeUnsortedData = new WriteUnsortedData(tempFilesDir, CompressionCodecName.GZIP, schemaUtils, graphPartitioner, fileNameForGroupAndPartitionId, fileNameForGroupAndPartitionIdForReversedEdge);
// When
writeUnsortedData.writeElements(elements);
// Then
// - For each group, directories split0 and split1 should exist and each contain one file
testExistsAndContainsNFiles(tempFilesDir + "/GROUP=" + TestGroups.ENTITY + "/split-0", 1);
testExistsAndContainsNFiles(tempFilesDir + "/GROUP=" + TestGroups.ENTITY + "/split-1", 1);
testExistsAndContainsNFiles(tempFilesDir + "/GROUP=" + TestGroups.ENTITY_2 + "/split-0", 1);
testExistsAndContainsNFiles(tempFilesDir + "/GROUP=" + TestGroups.ENTITY_2 + "/split-1", 1);
testExistsAndContainsNFiles(tempFilesDir + "/GROUP=" + TestGroups.EDGE + "/split-0", 1);
testExistsAndContainsNFiles(tempFilesDir + "/GROUP=" + TestGroups.EDGE + "/split-1", 1);
testExistsAndContainsNFiles(tempFilesDir + "/GROUP=" + TestGroups.EDGE_2 + "/split-0", 1);
testExistsAndContainsNFiles(tempFilesDir + "/GROUP=" + TestGroups.EDGE_2 + "/split-1", 1);
testExistsAndContainsNFiles(tempFilesDir + "/REVERSED-GROUP=" + TestGroups.EDGE + "/split-0", 1);
testExistsAndContainsNFiles(tempFilesDir + "/REVERSED-GROUP=" + TestGroups.EDGE + "/split-1", 1);
testExistsAndContainsNFiles(tempFilesDir + "/REVERSED-GROUP=" + TestGroups.EDGE_2 + "/split-0", 1);
testExistsAndContainsNFiles(tempFilesDir + "/REVERSED-GROUP=" + TestGroups.EDGE_2 + "/split-1", 1);
// - Each split file should contain the data for that split in the order it was written
for (final String group : new HashSet<>(Arrays.asList(TestGroups.ENTITY, TestGroups.ENTITY_2))) {
testSplitFileContainsCorrectData(tempFilesDir + "/GROUP=" + group + "/split-0", group, true, false, null, graphPartitioner.getGroupPartitioner(group).getIthPartitionKey(0), elements, schemaUtils);
testSplitFileContainsCorrectData(tempFilesDir + "/GROUP=" + group + "/split-1", group, true, false, graphPartitioner.getGroupPartitioner(group).getIthPartitionKey(0), null, elements, schemaUtils);
}
for (final String group : new HashSet<>(Arrays.asList(TestGroups.EDGE, TestGroups.EDGE_2))) {
testSplitFileContainsCorrectData(tempFilesDir + "/GROUP=" + group + "/split-0", group, false, false, null, graphPartitioner.getGroupPartitioner(group).getIthPartitionKey(0), elements, schemaUtils);
testSplitFileContainsCorrectData(tempFilesDir + "/REVERSED-GROUP=" + group + "/split-0", group, false, true, null, graphPartitioner.getGroupPartitionerForReversedEdges(group).getIthPartitionKey(0), elements, schemaUtils);
testSplitFileContainsCorrectData(tempFilesDir + "/GROUP=" + group + "/split-1", group, false, false, graphPartitioner.getGroupPartitioner(group).getIthPartitionKey(0), null, elements, schemaUtils);
testSplitFileContainsCorrectData(tempFilesDir + "/REVERSED-GROUP=" + group + "/split-1", group, false, true, graphPartitioner.getGroupPartitionerForReversedEdges(group).getIthPartitionKey(0), null, elements, schemaUtils);
}
}
Aggregations