Search in sources :

Example 16 with GrouperMetadata

use of org.apache.samza.container.grouper.task.GrouperMetadata in project samza by apache.

the class TestGroupBySystemStreamPartitionWithGrouperProxy method testMultipleStreamExpansionWithNewStreams.

@Test
public void testMultipleStreamExpansionWithNewStreams() {
    Map<TaskName, List<SystemStreamPartition>> prevGroupingWithMultipleStreams = ImmutableMap.<TaskName, List<SystemStreamPartition>>builder().put(new TaskName("SystemStreamPartition [kafka, PVE, 0]"), ImmutableList.of(new SystemStreamPartition("kafka", "PVE", new Partition(0)))).put(new TaskName("SystemStreamPartition [kafka, PVE, 1]"), ImmutableList.of(new SystemStreamPartition("kafka", "PVE", new Partition(1)))).put(new TaskName("SystemStreamPartition [kafka, PVE, 2]"), ImmutableList.of(new SystemStreamPartition("kafka", "PVE", new Partition(2)))).put(new TaskName("SystemStreamPartition [kafka, PVE, 3]"), ImmutableList.of(new SystemStreamPartition("kafka", "PVE", new Partition(3)))).put(new TaskName("SystemStreamPartition [kafka, URE, 0]"), ImmutableList.of(new SystemStreamPartition("kafka", "URE", new Partition(0)))).put(new TaskName("SystemStreamPartition [kafka, URE, 1]"), ImmutableList.of(new SystemStreamPartition("kafka", "URE", new Partition(1)))).put(new TaskName("SystemStreamPartition [kafka, URE, 2]"), ImmutableList.of(new SystemStreamPartition("kafka", "URE", new Partition(2)))).put(new TaskName("SystemStreamPartition [kafka, URE, 3]"), ImmutableList.of(new SystemStreamPartition("kafka", "URE", new Partition(3)))).build();
    Set<SystemStreamPartition> currSsps = IntStream.range(0, 8).mapToObj(partitionId -> new SystemStreamPartition("kafka", "PVE", new Partition(partitionId))).collect(Collectors.toSet());
    IntStream.range(0, 8).forEach(partitionId -> currSsps.add(new SystemStreamPartition("kafka", "BOB", new Partition(partitionId))));
    IntStream.range(0, 8).forEach(partitionId -> currSsps.add(new SystemStreamPartition("kafka", "URE", new Partition(partitionId))));
    Map<TaskName, Set<SystemStreamPartition>> expectedGroupingForStateful = ImmutableMap.<TaskName, Set<SystemStreamPartition>>builder().put(new TaskName("SystemStreamPartition [kafka, BOB, 1]"), ImmutableSet.of(new SystemStreamPartition("kafka", "BOB", new Partition(1)))).put(new TaskName("SystemStreamPartition [kafka, BOB, 0]"), ImmutableSet.of(new SystemStreamPartition("kafka", "BOB", new Partition(0)))).put(new TaskName("SystemStreamPartition [kafka, BOB, 3]"), ImmutableSet.of(new SystemStreamPartition("kafka", "BOB", new Partition(3)))).put(new TaskName("SystemStreamPartition [kafka, BOB, 2]"), ImmutableSet.of(new SystemStreamPartition("kafka", "BOB", new Partition(2)))).put(new TaskName("SystemStreamPartition [kafka, BOB, 5]"), ImmutableSet.of(new SystemStreamPartition("kafka", "BOB", new Partition(5)))).put(new TaskName("SystemStreamPartition [kafka, BOB, 4]"), ImmutableSet.of(new SystemStreamPartition("kafka", "BOB", new Partition(4)))).put(new TaskName("SystemStreamPartition [kafka, BOB, 7]"), ImmutableSet.of(new SystemStreamPartition("kafka", "BOB", new Partition(7)))).put(new TaskName("SystemStreamPartition [kafka, BOB, 6]"), ImmutableSet.of(new SystemStreamPartition("kafka", "BOB", new Partition(6)))).put(new TaskName("SystemStreamPartition [kafka, PVE, 1]"), ImmutableSet.of(new SystemStreamPartition("kafka", "PVE", new Partition(1)), new SystemStreamPartition("kafka", "PVE", new Partition(5)))).put(new TaskName("SystemStreamPartition [kafka, PVE, 2]"), ImmutableSet.of(new SystemStreamPartition("kafka", "PVE", new Partition(2)), new SystemStreamPartition("kafka", "PVE", new Partition(6)))).put(new TaskName("SystemStreamPartition [kafka, PVE, 3]"), ImmutableSet.of(new SystemStreamPartition("kafka", "PVE", new Partition(3)), new SystemStreamPartition("kafka", "PVE", new Partition(7)))).put(new TaskName("SystemStreamPartition [kafka, PVE, 0]"), ImmutableSet.of(new SystemStreamPartition("kafka", "PVE", new Partition(0)), new SystemStreamPartition("kafka", "PVE", new Partition(4)))).put(new TaskName("SystemStreamPartition [kafka, URE, 1]"), ImmutableSet.of(new SystemStreamPartition("kafka", "URE", new Partition(1)), new SystemStreamPartition("kafka", "URE", new Partition(5)))).put(new TaskName("SystemStreamPartition [kafka, URE, 2]"), ImmutableSet.of(new SystemStreamPartition("kafka", "URE", new Partition(2)), new SystemStreamPartition("kafka", "URE", new Partition(6)))).put(new TaskName("SystemStreamPartition [kafka, URE, 3]"), ImmutableSet.of(new SystemStreamPartition("kafka", "URE", new Partition(3)), new SystemStreamPartition("kafka", "URE", new Partition(7)))).put(new TaskName("SystemStreamPartition [kafka, URE, 0]"), ImmutableSet.of(new SystemStreamPartition("kafka", "URE", new Partition(0)), new SystemStreamPartition("kafka", "URE", new Partition(4)))).build();
    Map<TaskName, Set<SystemStreamPartition>> expectedGroupingForStateless = ImmutableMap.<TaskName, Set<SystemStreamPartition>>builder().put(new TaskName("SystemStreamPartition [kafka, BOB, 1]"), ImmutableSet.of(new SystemStreamPartition("kafka", "BOB", new Partition(1)))).put(new TaskName("SystemStreamPartition [kafka, BOB, 0]"), ImmutableSet.of(new SystemStreamPartition("kafka", "BOB", new Partition(0)))).put(new TaskName("SystemStreamPartition [kafka, BOB, 3]"), ImmutableSet.of(new SystemStreamPartition("kafka", "BOB", new Partition(3)))).put(new TaskName("SystemStreamPartition [kafka, BOB, 2]"), ImmutableSet.of(new SystemStreamPartition("kafka", "BOB", new Partition(2)))).put(new TaskName("SystemStreamPartition [kafka, BOB, 5]"), ImmutableSet.of(new SystemStreamPartition("kafka", "BOB", new Partition(5)))).put(new TaskName("SystemStreamPartition [kafka, BOB, 4]"), ImmutableSet.of(new SystemStreamPartition("kafka", "BOB", new Partition(4)))).put(new TaskName("SystemStreamPartition [kafka, BOB, 7]"), ImmutableSet.of(new SystemStreamPartition("kafka", "BOB", new Partition(7)))).put(new TaskName("SystemStreamPartition [kafka, BOB, 6]"), ImmutableSet.of(new SystemStreamPartition("kafka", "BOB", new Partition(6)))).put(new TaskName("SystemStreamPartition [kafka, PVE, 1]"), ImmutableSet.of(new SystemStreamPartition("kafka", "PVE", new Partition(1)))).put(new TaskName("SystemStreamPartition [kafka, PVE, 2]"), ImmutableSet.of(new SystemStreamPartition("kafka", "PVE", new Partition(2)))).put(new TaskName("SystemStreamPartition [kafka, PVE, 3]"), ImmutableSet.of(new SystemStreamPartition("kafka", "PVE", new Partition(3)))).put(new TaskName("SystemStreamPartition [kafka, PVE, 0]"), ImmutableSet.of(new SystemStreamPartition("kafka", "PVE", new Partition(0)))).put(new TaskName("SystemStreamPartition [kafka, PVE, 4]"), ImmutableSet.of(new SystemStreamPartition("kafka", "PVE", new Partition(4)))).put(new TaskName("SystemStreamPartition [kafka, PVE, 5]"), ImmutableSet.of(new SystemStreamPartition("kafka", "PVE", new Partition(5)))).put(new TaskName("SystemStreamPartition [kafka, PVE, 6]"), ImmutableSet.of(new SystemStreamPartition("kafka", "PVE", new Partition(6)))).put(new TaskName("SystemStreamPartition [kafka, PVE, 7]"), ImmutableSet.of(new SystemStreamPartition("kafka", "PVE", new Partition(7)))).put(new TaskName("SystemStreamPartition [kafka, URE, 1]"), ImmutableSet.of(new SystemStreamPartition("kafka", "URE", new Partition(1)))).put(new TaskName("SystemStreamPartition [kafka, URE, 2]"), ImmutableSet.of(new SystemStreamPartition("kafka", "URE", new Partition(2)))).put(new TaskName("SystemStreamPartition [kafka, URE, 3]"), ImmutableSet.of(new SystemStreamPartition("kafka", "URE", new Partition(3)))).put(new TaskName("SystemStreamPartition [kafka, URE, 0]"), ImmutableSet.of(new SystemStreamPartition("kafka", "URE", new Partition(0)))).put(new TaskName("SystemStreamPartition [kafka, URE, 4]"), ImmutableSet.of(new SystemStreamPartition("kafka", "URE", new Partition(4)))).put(new TaskName("SystemStreamPartition [kafka, URE, 5]"), ImmutableSet.of(new SystemStreamPartition("kafka", "URE", new Partition(5)))).put(new TaskName("SystemStreamPartition [kafka, URE, 6]"), ImmutableSet.of(new SystemStreamPartition("kafka", "URE", new Partition(6)))).put(new TaskName("SystemStreamPartition [kafka, URE, 7]"), ImmutableSet.of(new SystemStreamPartition("kafka", "URE", new Partition(7)))).build();
    // SSPGrouperProxy for stateful job
    SSPGrouperProxy groupBySystemStreamPartition = buildSspGrouperProxy(true);
    GrouperMetadata grouperMetadata = new GrouperMetadataImpl(new HashMap<>(), new HashMap<>(), prevGroupingWithMultipleStreams, new HashMap<>());
    Map<TaskName, Set<SystemStreamPartition>> finalGrouping = groupBySystemStreamPartition.group(currSsps, grouperMetadata);
    Assert.assertEquals(expectedGroupingForStateful, finalGrouping);
    // SSPGrouperProxy for stateless job
    groupBySystemStreamPartition = buildSspGrouperProxy(false);
    finalGrouping = groupBySystemStreamPartition.group(currSsps, grouperMetadata);
    Assert.assertEquals(expectedGroupingForStateless, finalGrouping);
}
Also used : IntStream(java.util.stream.IntStream) StorageConfig(org.apache.samza.config.StorageConfig) ImmutableSet(com.google.common.collect.ImmutableSet) TaskName(org.apache.samza.container.TaskName) ImmutableMap(com.google.common.collect.ImmutableMap) Partition(org.apache.samza.Partition) Set(java.util.Set) GrouperMetadataImpl(org.apache.samza.container.grouper.task.GrouperMetadataImpl) HashMap(java.util.HashMap) Test(org.junit.Test) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) Collectors(java.util.stream.Collectors) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) Map(java.util.Map) GrouperMetadata(org.apache.samza.container.grouper.task.GrouperMetadata) Assert(org.junit.Assert) MapConfig(org.apache.samza.config.MapConfig) Partition(org.apache.samza.Partition) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) ImmutableSet(com.google.common.collect.ImmutableSet) Set(java.util.Set) GrouperMetadataImpl(org.apache.samza.container.grouper.task.GrouperMetadataImpl) TaskName(org.apache.samza.container.TaskName) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) GrouperMetadata(org.apache.samza.container.grouper.task.GrouperMetadata) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) Test(org.junit.Test)

Example 17 with GrouperMetadata

use of org.apache.samza.container.grouper.task.GrouperMetadata in project samza by apache.

the class TestGroupByPartitionWithGrouperProxy method testMultipleStreamRepartitioningWithNewStreams.

@Test
public void testMultipleStreamRepartitioningWithNewStreams() {
    Map<TaskName, List<SystemStreamPartition>> prevGroupingWithMultipleStreams = ImmutableMap.<TaskName, List<SystemStreamPartition>>builder().put(new TaskName("Partition 0"), ImmutableList.of(new SystemStreamPartition("kafka", "PVE", new Partition(0)), new SystemStreamPartition("kafka", "URE", new Partition(0)))).put(new TaskName("Partition 1"), ImmutableList.of(new SystemStreamPartition("kafka", "PVE", new Partition(1)), new SystemStreamPartition("kafka", "URE", new Partition(1)))).put(new TaskName("Partition 2"), ImmutableList.of(new SystemStreamPartition("kafka", "PVE", new Partition(2)), new SystemStreamPartition("kafka", "URE", new Partition(2)))).put(new TaskName("Partition 3"), ImmutableList.of(new SystemStreamPartition("kafka", "PVE", new Partition(3)), new SystemStreamPartition("kafka", "URE", new Partition(3)))).build();
    Set<SystemStreamPartition> currSsps = new HashSet<>();
    IntStream.range(0, 8).forEach(partitionId -> currSsps.add(new SystemStreamPartition("kafka", "PVE", new Partition(partitionId))));
    IntStream.range(0, 8).forEach(partitionId -> currSsps.add(new SystemStreamPartition("kafka", "URE", new Partition(partitionId))));
    IntStream.range(0, 8).forEach(partitionId -> currSsps.add(new SystemStreamPartition("kafka", "BOB", new Partition(partitionId))));
    Map<TaskName, Set<SystemStreamPartition>> expectedGroupingForStateful = ImmutableMap.<TaskName, Set<SystemStreamPartition>>builder().put(new TaskName("Partition 1"), ImmutableSet.of(new SystemStreamPartition("kafka", "PVE", new Partition(1)), new SystemStreamPartition("kafka", "PVE", new Partition(5)), new SystemStreamPartition("kafka", "URE", new Partition(1)), new SystemStreamPartition("kafka", "URE", new Partition(5)), new SystemStreamPartition("kafka", "BOB", new Partition(1)))).put(new TaskName("Partition 0"), ImmutableSet.of(new SystemStreamPartition("kafka", "PVE", new Partition(0)), new SystemStreamPartition("kafka", "PVE", new Partition(4)), new SystemStreamPartition("kafka", "URE", new Partition(0)), new SystemStreamPartition("kafka", "URE", new Partition(4)), new SystemStreamPartition("kafka", "BOB", new Partition(0)))).put(new TaskName("Partition 3"), ImmutableSet.of(new SystemStreamPartition("kafka", "PVE", new Partition(7)), new SystemStreamPartition("kafka", "PVE", new Partition(3)), new SystemStreamPartition("kafka", "URE", new Partition(3)), new SystemStreamPartition("kafka", "URE", new Partition(7)), new SystemStreamPartition("kafka", "BOB", new Partition(3)))).put(new TaskName("Partition 2"), ImmutableSet.of(new SystemStreamPartition("kafka", "PVE", new Partition(2)), new SystemStreamPartition("kafka", "PVE", new Partition(6)), new SystemStreamPartition("kafka", "URE", new Partition(2)), new SystemStreamPartition("kafka", "URE", new Partition(6)), new SystemStreamPartition("kafka", "BOB", new Partition(2)))).put(new TaskName("Partition 5"), ImmutableSet.of(new SystemStreamPartition("kafka", "BOB", new Partition(5)))).put(new TaskName("Partition 4"), ImmutableSet.of(new SystemStreamPartition("kafka", "BOB", new Partition(4)))).put(new TaskName("Partition 7"), ImmutableSet.of(new SystemStreamPartition("kafka", "BOB", new Partition(7)))).put(new TaskName("Partition 6"), ImmutableSet.of(new SystemStreamPartition("kafka", "BOB", new Partition(6)))).build();
    Map<TaskName, Set<SystemStreamPartition>> expectedGroupingForStateless = ImmutableMap.<TaskName, Set<SystemStreamPartition>>builder().put(new TaskName("Partition 1"), ImmutableSet.of(new SystemStreamPartition("kafka", "PVE", new Partition(1)), new SystemStreamPartition("kafka", "URE", new Partition(1)), new SystemStreamPartition("kafka", "BOB", new Partition(1)))).put(new TaskName("Partition 0"), ImmutableSet.of(new SystemStreamPartition("kafka", "PVE", new Partition(0)), new SystemStreamPartition("kafka", "URE", new Partition(0)), new SystemStreamPartition("kafka", "BOB", new Partition(0)))).put(new TaskName("Partition 3"), ImmutableSet.of(new SystemStreamPartition("kafka", "PVE", new Partition(3)), new SystemStreamPartition("kafka", "URE", new Partition(3)), new SystemStreamPartition("kafka", "BOB", new Partition(3)))).put(new TaskName("Partition 2"), ImmutableSet.of(new SystemStreamPartition("kafka", "PVE", new Partition(2)), new SystemStreamPartition("kafka", "URE", new Partition(2)), new SystemStreamPartition("kafka", "BOB", new Partition(2)))).put(new TaskName("Partition 5"), ImmutableSet.of(new SystemStreamPartition("kafka", "BOB", new Partition(5)), new SystemStreamPartition("kafka", "PVE", new Partition(5)), new SystemStreamPartition("kafka", "URE", new Partition(5)))).put(new TaskName("Partition 4"), ImmutableSet.of(new SystemStreamPartition("kafka", "BOB", new Partition(4)), new SystemStreamPartition("kafka", "PVE", new Partition(4)), new SystemStreamPartition("kafka", "URE", new Partition(4)))).put(new TaskName("Partition 7"), ImmutableSet.of(new SystemStreamPartition("kafka", "BOB", new Partition(7)), new SystemStreamPartition("kafka", "PVE", new Partition(7)), new SystemStreamPartition("kafka", "URE", new Partition(7)))).put(new TaskName("Partition 6"), ImmutableSet.of(new SystemStreamPartition("kafka", "BOB", new Partition(6)), new SystemStreamPartition("kafka", "PVE", new Partition(6)), new SystemStreamPartition("kafka", "URE", new Partition(6)))).build();
    // SSPGrouperProxy for stateful job
    SSPGrouperProxy groupByPartition = buildSspGrouperProxy(true);
    GrouperMetadata grouperMetadata = new GrouperMetadataImpl(new HashMap<>(), new HashMap<>(), prevGroupingWithMultipleStreams, new HashMap<>());
    Map<TaskName, Set<SystemStreamPartition>> finalGrouping = groupByPartition.group(currSsps, grouperMetadata);
    Assert.assertEquals(expectedGroupingForStateful, finalGrouping);
    // SSPGrouperProxy for stateless job
    groupByPartition = buildSspGrouperProxy(false);
    finalGrouping = groupByPartition.group(currSsps, grouperMetadata);
    Assert.assertEquals(expectedGroupingForStateless, finalGrouping);
}
Also used : Partition(org.apache.samza.Partition) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) ImmutableSet(com.google.common.collect.ImmutableSet) Set(java.util.Set) HashSet(java.util.HashSet) GrouperMetadataImpl(org.apache.samza.container.grouper.task.GrouperMetadataImpl) TaskName(org.apache.samza.container.TaskName) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) GrouperMetadata(org.apache.samza.container.grouper.task.GrouperMetadata) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) HashSet(java.util.HashSet) Test(org.junit.Test)

Aggregations

GrouperMetadata (org.apache.samza.container.grouper.task.GrouperMetadata)17 GrouperMetadataImpl (org.apache.samza.container.grouper.task.GrouperMetadataImpl)14 List (java.util.List)12 Set (java.util.Set)12 TaskName (org.apache.samza.container.TaskName)12 SystemStreamPartition (org.apache.samza.system.SystemStreamPartition)12 HashMap (java.util.HashMap)11 Map (java.util.Map)11 Collectors (java.util.stream.Collectors)11 ImmutableList (com.google.common.collect.ImmutableList)10 ImmutableSet (com.google.common.collect.ImmutableSet)10 Partition (org.apache.samza.Partition)10 MapConfig (org.apache.samza.config.MapConfig)10 Test (org.junit.Test)10 ImmutableMap (com.google.common.collect.ImmutableMap)9 IntStream (java.util.stream.IntStream)9 StorageConfig (org.apache.samza.config.StorageConfig)9 Assert (org.junit.Assert)9 HashSet (java.util.HashSet)7 JobModel (org.apache.samza.job.model.JobModel)5