Search in sources :

Example 1 with GrouperMetadata

use of org.apache.samza.container.grouper.task.GrouperMetadata in project samza by apache.

the class TestGroupBySystemStreamPartitionWithGrouperProxy method testMultipleStreamsWithSingleStreamExpansionAndNewStream.

@Test
public void testMultipleStreamsWithSingleStreamExpansionAndNewStream() {
    Map<TaskName, List<SystemStreamPartition>> prevGroupingWithMultipleStreams = ImmutableMap.<TaskName, List<SystemStreamPartition>>builder().put(new TaskName("SystemStreamPartition [kafka, PVE, 0]"), ImmutableList.of(new SystemStreamPartition("kafka", "PVE", new Partition(0)))).put(new TaskName("SystemStreamPartition [kafka, PVE, 1]"), ImmutableList.of(new SystemStreamPartition("kafka", "PVE", new Partition(1)))).put(new TaskName("SystemStreamPartition [kafka, PVE, 2]"), ImmutableList.of(new SystemStreamPartition("kafka", "PVE", new Partition(2)))).put(new TaskName("SystemStreamPartition [kafka, PVE, 3]"), ImmutableList.of(new SystemStreamPartition("kafka", "PVE", new Partition(3)))).put(new TaskName("SystemStreamPartition [kafka, URE, 0]"), ImmutableList.of(new SystemStreamPartition("kafka", "URE", new Partition(0)))).put(new TaskName("SystemStreamPartition [kafka, URE, 1]"), ImmutableList.of(new SystemStreamPartition("kafka", "URE", new Partition(1)))).put(new TaskName("SystemStreamPartition [kafka, URE, 2]"), ImmutableList.of(new SystemStreamPartition("kafka", "URE", new Partition(2)))).put(new TaskName("SystemStreamPartition [kafka, URE, 3]"), ImmutableList.of(new SystemStreamPartition("kafka", "URE", new Partition(3)))).build();
    Set<SystemStreamPartition> currSsps = IntStream.range(0, 8).mapToObj(partitionId -> new SystemStreamPartition("kafka", "PVE", new Partition(partitionId))).collect(Collectors.toSet());
    IntStream.range(0, 8).forEach(partitionId -> currSsps.add(new SystemStreamPartition("kafka", "BOB", new Partition(partitionId))));
    IntStream.range(0, 4).forEach(partitionId -> currSsps.add(new SystemStreamPartition("kafka", "URE", new Partition(partitionId))));
    Map<TaskName, Set<SystemStreamPartition>> expectedGroupingForStateful = ImmutableMap.<TaskName, Set<SystemStreamPartition>>builder().put(new TaskName("SystemStreamPartition [kafka, BOB, 2]"), ImmutableSet.of(new SystemStreamPartition("kafka", "BOB", new Partition(2)))).put(new TaskName("SystemStreamPartition [kafka, BOB, 1]"), ImmutableSet.of(new SystemStreamPartition("kafka", "BOB", new Partition(1)))).put(new TaskName("SystemStreamPartition [kafka, BOB, 0]"), ImmutableSet.of(new SystemStreamPartition("kafka", "BOB", new Partition(0)))).put(new TaskName("SystemStreamPartition [kafka, BOB, 7]"), ImmutableSet.of(new SystemStreamPartition("kafka", "BOB", new Partition(7)))).put(new TaskName("SystemStreamPartition [kafka, BOB, 6]"), ImmutableSet.of(new SystemStreamPartition("kafka", "BOB", new Partition(6)))).put(new TaskName("SystemStreamPartition [kafka, BOB, 4]"), ImmutableSet.of(new SystemStreamPartition("kafka", "BOB", new Partition(4)))).put(new TaskName("SystemStreamPartition [kafka, BOB, 5]"), ImmutableSet.of(new SystemStreamPartition("kafka", "BOB", new Partition(5)))).put(new TaskName("SystemStreamPartition [kafka, BOB, 3]"), ImmutableSet.of(new SystemStreamPartition("kafka", "BOB", new Partition(3)))).put(new TaskName("SystemStreamPartition [kafka, PVE, 1]"), ImmutableSet.of(new SystemStreamPartition("kafka", "PVE", new Partition(1)), new SystemStreamPartition("kafka", "PVE", new Partition(5)))).put(new TaskName("SystemStreamPartition [kafka, PVE, 2]"), ImmutableSet.of(new SystemStreamPartition("kafka", "PVE", new Partition(2)), new SystemStreamPartition("kafka", "PVE", new Partition(6)))).put(new TaskName("SystemStreamPartition [kafka, PVE, 3]"), ImmutableSet.of(new SystemStreamPartition("kafka", "PVE", new Partition(3)), new SystemStreamPartition("kafka", "PVE", new Partition(7)))).put(new TaskName("SystemStreamPartition [kafka, PVE, 0]"), ImmutableSet.of(new SystemStreamPartition("kafka", "PVE", new Partition(0)), new SystemStreamPartition("kafka", "PVE", new Partition(4)))).put(new TaskName("SystemStreamPartition [kafka, URE, 3]"), ImmutableSet.of(new SystemStreamPartition("kafka", "URE", new Partition(3)))).put(new TaskName("SystemStreamPartition [kafka, URE, 2]"), ImmutableSet.of(new SystemStreamPartition("kafka", "URE", new Partition(2)))).put(new TaskName("SystemStreamPartition [kafka, URE, 1]"), ImmutableSet.of(new SystemStreamPartition("kafka", "URE", new Partition(1)))).put(new TaskName("SystemStreamPartition [kafka, URE, 0]"), ImmutableSet.of(new SystemStreamPartition("kafka", "URE", new Partition(0)))).build();
    Map<TaskName, Set<SystemStreamPartition>> expectedGroupingForStateless = ImmutableMap.<TaskName, Set<SystemStreamPartition>>builder().put(new TaskName("SystemStreamPartition [kafka, BOB, 2]"), ImmutableSet.of(new SystemStreamPartition("kafka", "BOB", new Partition(2)))).put(new TaskName("SystemStreamPartition [kafka, BOB, 1]"), ImmutableSet.of(new SystemStreamPartition("kafka", "BOB", new Partition(1)))).put(new TaskName("SystemStreamPartition [kafka, BOB, 0]"), ImmutableSet.of(new SystemStreamPartition("kafka", "BOB", new Partition(0)))).put(new TaskName("SystemStreamPartition [kafka, BOB, 7]"), ImmutableSet.of(new SystemStreamPartition("kafka", "BOB", new Partition(7)))).put(new TaskName("SystemStreamPartition [kafka, BOB, 6]"), ImmutableSet.of(new SystemStreamPartition("kafka", "BOB", new Partition(6)))).put(new TaskName("SystemStreamPartition [kafka, BOB, 4]"), ImmutableSet.of(new SystemStreamPartition("kafka", "BOB", new Partition(4)))).put(new TaskName("SystemStreamPartition [kafka, BOB, 5]"), ImmutableSet.of(new SystemStreamPartition("kafka", "BOB", new Partition(5)))).put(new TaskName("SystemStreamPartition [kafka, BOB, 3]"), ImmutableSet.of(new SystemStreamPartition("kafka", "BOB", new Partition(3)))).put(new TaskName("SystemStreamPartition [kafka, PVE, 1]"), ImmutableSet.of(new SystemStreamPartition("kafka", "PVE", new Partition(1)))).put(new TaskName("SystemStreamPartition [kafka, PVE, 2]"), ImmutableSet.of(new SystemStreamPartition("kafka", "PVE", new Partition(2)))).put(new TaskName("SystemStreamPartition [kafka, PVE, 3]"), ImmutableSet.of(new SystemStreamPartition("kafka", "PVE", new Partition(3)))).put(new TaskName("SystemStreamPartition [kafka, PVE, 0]"), ImmutableSet.of(new SystemStreamPartition("kafka", "PVE", new Partition(0)))).put(new TaskName("SystemStreamPartition [kafka, PVE, 4]"), ImmutableSet.of(new SystemStreamPartition("kafka", "PVE", new Partition(4)))).put(new TaskName("SystemStreamPartition [kafka, PVE, 5]"), ImmutableSet.of(new SystemStreamPartition("kafka", "PVE", new Partition(5)))).put(new TaskName("SystemStreamPartition [kafka, PVE, 6]"), ImmutableSet.of(new SystemStreamPartition("kafka", "PVE", new Partition(6)))).put(new TaskName("SystemStreamPartition [kafka, PVE, 7]"), ImmutableSet.of(new SystemStreamPartition("kafka", "PVE", new Partition(7)))).put(new TaskName("SystemStreamPartition [kafka, URE, 3]"), ImmutableSet.of(new SystemStreamPartition("kafka", "URE", new Partition(3)))).put(new TaskName("SystemStreamPartition [kafka, URE, 2]"), ImmutableSet.of(new SystemStreamPartition("kafka", "URE", new Partition(2)))).put(new TaskName("SystemStreamPartition [kafka, URE, 1]"), ImmutableSet.of(new SystemStreamPartition("kafka", "URE", new Partition(1)))).put(new TaskName("SystemStreamPartition [kafka, URE, 0]"), ImmutableSet.of(new SystemStreamPartition("kafka", "URE", new Partition(0)))).build();
    // SSPGrouperProxy for stateful job
    SSPGrouperProxy groupBySystemStreamPartition = buildSspGrouperProxy(true);
    GrouperMetadata grouperMetadata = new GrouperMetadataImpl(new HashMap<>(), new HashMap<>(), prevGroupingWithMultipleStreams, new HashMap<>());
    Map<TaskName, Set<SystemStreamPartition>> finalGrouping = groupBySystemStreamPartition.group(currSsps, grouperMetadata);
    Assert.assertEquals(expectedGroupingForStateful, finalGrouping);
    // SSPGrouperProxy for stateless job
    groupBySystemStreamPartition = buildSspGrouperProxy(false);
    finalGrouping = groupBySystemStreamPartition.group(currSsps, grouperMetadata);
    Assert.assertEquals(expectedGroupingForStateless, finalGrouping);
}
Also used : IntStream(java.util.stream.IntStream) StorageConfig(org.apache.samza.config.StorageConfig) ImmutableSet(com.google.common.collect.ImmutableSet) TaskName(org.apache.samza.container.TaskName) ImmutableMap(com.google.common.collect.ImmutableMap) Partition(org.apache.samza.Partition) Set(java.util.Set) GrouperMetadataImpl(org.apache.samza.container.grouper.task.GrouperMetadataImpl) HashMap(java.util.HashMap) Test(org.junit.Test) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) Collectors(java.util.stream.Collectors) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) Map(java.util.Map) GrouperMetadata(org.apache.samza.container.grouper.task.GrouperMetadata) Assert(org.junit.Assert) MapConfig(org.apache.samza.config.MapConfig) Partition(org.apache.samza.Partition) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) ImmutableSet(com.google.common.collect.ImmutableSet) Set(java.util.Set) GrouperMetadataImpl(org.apache.samza.container.grouper.task.GrouperMetadataImpl) TaskName(org.apache.samza.container.TaskName) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) GrouperMetadata(org.apache.samza.container.grouper.task.GrouperMetadata) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) Test(org.junit.Test)

Example 2 with GrouperMetadata

use of org.apache.samza.container.grouper.task.GrouperMetadata in project samza by apache.

the class TestGroupBySystemStreamPartitionWithGrouperProxy method testRemovalAndAdditionOfStreamsWithExpansion.

@Test
public void testRemovalAndAdditionOfStreamsWithExpansion() {
    Map<TaskName, List<SystemStreamPartition>> prevGroupingWithMultipleStreams = ImmutableMap.<TaskName, List<SystemStreamPartition>>builder().put(new TaskName("SystemStreamPartition [kafka, PVE, 0]"), ImmutableList.of(new SystemStreamPartition("kafka", "PVE", new Partition(0)))).put(new TaskName("SystemStreamPartition [kafka, PVE, 1]"), ImmutableList.of(new SystemStreamPartition("kafka", "PVE", new Partition(1)))).put(new TaskName("SystemStreamPartition [kafka, PVE, 2]"), ImmutableList.of(new SystemStreamPartition("kafka", "PVE", new Partition(2)))).put(new TaskName("SystemStreamPartition [kafka, PVE, 3]"), ImmutableList.of(new SystemStreamPartition("kafka", "PVE", new Partition(3)))).put(new TaskName("SystemStreamPartition [kafka, URE, 0]"), ImmutableList.of(new SystemStreamPartition("kafka", "URE", new Partition(0)))).put(new TaskName("SystemStreamPartition [kafka, URE, 1]"), ImmutableList.of(new SystemStreamPartition("kafka", "URE", new Partition(1)))).put(new TaskName("SystemStreamPartition [kafka, URE, 2]"), ImmutableList.of(new SystemStreamPartition("kafka", "URE", new Partition(2)))).put(new TaskName("SystemStreamPartition [kafka, URE, 3]"), ImmutableList.of(new SystemStreamPartition("kafka", "URE", new Partition(3)))).build();
    Set<SystemStreamPartition> currSsps = IntStream.range(0, 8).mapToObj(partitionId -> new SystemStreamPartition("kafka", "PVE", new Partition(partitionId))).collect(Collectors.toSet());
    IntStream.range(0, 8).forEach(partitionId -> currSsps.add(new SystemStreamPartition("kafka", "BOB", new Partition(partitionId))));
    Map<TaskName, Set<SystemStreamPartition>> expectedGroupingForStateful = ImmutableMap.<TaskName, Set<SystemStreamPartition>>builder().put(new TaskName("SystemStreamPartition [kafka, PVE, 1]"), ImmutableSet.of(new SystemStreamPartition("kafka", "PVE", new Partition(5)), new SystemStreamPartition("kafka", "PVE", new Partition(1)))).put(new TaskName("SystemStreamPartition [kafka, PVE, 2]"), ImmutableSet.of(new SystemStreamPartition("kafka", "PVE", new Partition(6)), new SystemStreamPartition("kafka", "PVE", new Partition(2)))).put(new TaskName("SystemStreamPartition [kafka, PVE, 3]"), ImmutableSet.of(new SystemStreamPartition("kafka", "PVE", new Partition(7)), new SystemStreamPartition("kafka", "PVE", new Partition(3)))).put(new TaskName("SystemStreamPartition [kafka, PVE, 0]"), ImmutableSet.of(new SystemStreamPartition("kafka", "PVE", new Partition(4)), new SystemStreamPartition("kafka", "PVE", new Partition(0)))).put(new TaskName("SystemStreamPartition [kafka, BOB, 7]"), ImmutableSet.of(new SystemStreamPartition("kafka", "BOB", new Partition(7)))).put(new TaskName("SystemStreamPartition [kafka, BOB, 6]"), ImmutableSet.of(new SystemStreamPartition("kafka", "BOB", new Partition(6)))).put(new TaskName("SystemStreamPartition [kafka, BOB, 5]"), ImmutableSet.of(new SystemStreamPartition("kafka", "BOB", new Partition(5)))).put(new TaskName("SystemStreamPartition [kafka, BOB, 4]"), ImmutableSet.of(new SystemStreamPartition("kafka", "BOB", new Partition(4)))).put(new TaskName("SystemStreamPartition [kafka, BOB, 3]"), ImmutableSet.of(new SystemStreamPartition("kafka", "BOB", new Partition(3)))).put(new TaskName("SystemStreamPartition [kafka, BOB, 2]"), ImmutableSet.of(new SystemStreamPartition("kafka", "BOB", new Partition(2)))).put(new TaskName("SystemStreamPartition [kafka, BOB, 1]"), ImmutableSet.of(new SystemStreamPartition("kafka", "BOB", new Partition(1)))).put(new TaskName("SystemStreamPartition [kafka, BOB, 0]"), ImmutableSet.of(new SystemStreamPartition("kafka", "BOB", new Partition(0)))).build();
    Map<TaskName, Set<SystemStreamPartition>> expectedGroupingForStateless = ImmutableMap.<TaskName, Set<SystemStreamPartition>>builder().put(new TaskName("SystemStreamPartition [kafka, PVE, 1]"), ImmutableSet.of(new SystemStreamPartition("kafka", "PVE", new Partition(1)))).put(new TaskName("SystemStreamPartition [kafka, PVE, 2]"), ImmutableSet.of(new SystemStreamPartition("kafka", "PVE", new Partition(2)))).put(new TaskName("SystemStreamPartition [kafka, PVE, 3]"), ImmutableSet.of(new SystemStreamPartition("kafka", "PVE", new Partition(3)))).put(new TaskName("SystemStreamPartition [kafka, PVE, 0]"), ImmutableSet.of(new SystemStreamPartition("kafka", "PVE", new Partition(0)))).put(new TaskName("SystemStreamPartition [kafka, PVE, 4]"), ImmutableSet.of(new SystemStreamPartition("kafka", "PVE", new Partition(4)))).put(new TaskName("SystemStreamPartition [kafka, PVE, 5]"), ImmutableSet.of(new SystemStreamPartition("kafka", "PVE", new Partition(5)))).put(new TaskName("SystemStreamPartition [kafka, PVE, 6]"), ImmutableSet.of(new SystemStreamPartition("kafka", "PVE", new Partition(6)))).put(new TaskName("SystemStreamPartition [kafka, PVE, 7]"), ImmutableSet.of(new SystemStreamPartition("kafka", "PVE", new Partition(7)))).put(new TaskName("SystemStreamPartition [kafka, BOB, 7]"), ImmutableSet.of(new SystemStreamPartition("kafka", "BOB", new Partition(7)))).put(new TaskName("SystemStreamPartition [kafka, BOB, 6]"), ImmutableSet.of(new SystemStreamPartition("kafka", "BOB", new Partition(6)))).put(new TaskName("SystemStreamPartition [kafka, BOB, 5]"), ImmutableSet.of(new SystemStreamPartition("kafka", "BOB", new Partition(5)))).put(new TaskName("SystemStreamPartition [kafka, BOB, 4]"), ImmutableSet.of(new SystemStreamPartition("kafka", "BOB", new Partition(4)))).put(new TaskName("SystemStreamPartition [kafka, BOB, 3]"), ImmutableSet.of(new SystemStreamPartition("kafka", "BOB", new Partition(3)))).put(new TaskName("SystemStreamPartition [kafka, BOB, 2]"), ImmutableSet.of(new SystemStreamPartition("kafka", "BOB", new Partition(2)))).put(new TaskName("SystemStreamPartition [kafka, BOB, 1]"), ImmutableSet.of(new SystemStreamPartition("kafka", "BOB", new Partition(1)))).put(new TaskName("SystemStreamPartition [kafka, BOB, 0]"), ImmutableSet.of(new SystemStreamPartition("kafka", "BOB", new Partition(0)))).build();
    // SSPGrouperProxy for stateful job
    SSPGrouperProxy groupBySystemStreamPartition = buildSspGrouperProxy(true);
    GrouperMetadata grouperMetadata = new GrouperMetadataImpl(new HashMap<>(), new HashMap<>(), prevGroupingWithMultipleStreams, new HashMap<>());
    Map<TaskName, Set<SystemStreamPartition>> finalGrouping = groupBySystemStreamPartition.group(currSsps, grouperMetadata);
    Assert.assertEquals(expectedGroupingForStateful, finalGrouping);
    // SSPGrouperProxy for stateless job
    groupBySystemStreamPartition = buildSspGrouperProxy(false);
    finalGrouping = groupBySystemStreamPartition.group(currSsps, grouperMetadata);
    Assert.assertEquals(expectedGroupingForStateless, finalGrouping);
}
Also used : IntStream(java.util.stream.IntStream) StorageConfig(org.apache.samza.config.StorageConfig) ImmutableSet(com.google.common.collect.ImmutableSet) TaskName(org.apache.samza.container.TaskName) ImmutableMap(com.google.common.collect.ImmutableMap) Partition(org.apache.samza.Partition) Set(java.util.Set) GrouperMetadataImpl(org.apache.samza.container.grouper.task.GrouperMetadataImpl) HashMap(java.util.HashMap) Test(org.junit.Test) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) Collectors(java.util.stream.Collectors) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) Map(java.util.Map) GrouperMetadata(org.apache.samza.container.grouper.task.GrouperMetadata) Assert(org.junit.Assert) MapConfig(org.apache.samza.config.MapConfig) Partition(org.apache.samza.Partition) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) ImmutableSet(com.google.common.collect.ImmutableSet) Set(java.util.Set) GrouperMetadataImpl(org.apache.samza.container.grouper.task.GrouperMetadataImpl) TaskName(org.apache.samza.container.TaskName) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) GrouperMetadata(org.apache.samza.container.grouper.task.GrouperMetadata) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) Test(org.junit.Test)

Example 3 with GrouperMetadata

use of org.apache.samza.container.grouper.task.GrouperMetadata in project samza by apache.

the class TestGroupByPartitionWithGrouperProxy method testRemovalOfPreviousStreamAndThenAddNewStream.

@Test
public void testRemovalOfPreviousStreamAndThenAddNewStream() {
    Map<TaskName, List<SystemStreamPartition>> prevGroupingWithMultipleStreams = ImmutableMap.<TaskName, List<SystemStreamPartition>>builder().put(new TaskName("Partition 0"), ImmutableList.of(new SystemStreamPartition("kafka", "PVE", new Partition(0)), new SystemStreamPartition("kafka", "URE", new Partition(0)))).put(new TaskName("Partition 1"), ImmutableList.of(new SystemStreamPartition("kafka", "PVE", new Partition(1)), new SystemStreamPartition("kafka", "URE", new Partition(1)))).put(new TaskName("Partition 2"), ImmutableList.of(new SystemStreamPartition("kafka", "PVE", new Partition(2)), new SystemStreamPartition("kafka", "URE", new Partition(2)))).put(new TaskName("Partition 3"), ImmutableList.of(new SystemStreamPartition("kafka", "PVE", new Partition(3)), new SystemStreamPartition("kafka", "URE", new Partition(3)))).build();
    Set<SystemStreamPartition> currSsps = IntStream.range(0, 8).mapToObj(partitionId -> new SystemStreamPartition("kafka", "BOB", new Partition(partitionId))).collect(Collectors.toSet());
    // expected Grouping
    Map<TaskName, Set<SystemStreamPartition>> expectedGroupingForStatefulAndStateless = ImmutableMap.<TaskName, Set<SystemStreamPartition>>builder().put(new TaskName("Partition 5"), ImmutableSet.of(new SystemStreamPartition("kafka", "BOB", new Partition(5)))).put(new TaskName("Partition 4"), ImmutableSet.of(new SystemStreamPartition("kafka", "BOB", new Partition(4)))).put(new TaskName("Partition 7"), ImmutableSet.of(new SystemStreamPartition("kafka", "BOB", new Partition(7)))).put(new TaskName("Partition 6"), ImmutableSet.of(new SystemStreamPartition("kafka", "BOB", new Partition(6)))).put(new TaskName("Partition 0"), ImmutableSet.of(new SystemStreamPartition("kafka", "BOB", new Partition(0)))).put(new TaskName("Partition 1"), ImmutableSet.of(new SystemStreamPartition("kafka", "BOB", new Partition(1)))).put(new TaskName("Partition 2"), ImmutableSet.of(new SystemStreamPartition("kafka", "BOB", new Partition(2)))).put(new TaskName("Partition 3"), ImmutableSet.of(new SystemStreamPartition("kafka", "BOB", new Partition(3)))).build();
    // SSPGrouperProxy for stateful job
    SSPGrouperProxy groupByPartition = buildSspGrouperProxy(true);
    GrouperMetadata grouperMetadata = new GrouperMetadataImpl(new HashMap<>(), new HashMap<>(), prevGroupingWithMultipleStreams, new HashMap<>());
    Map<TaskName, Set<SystemStreamPartition>> finalGrouping = groupByPartition.group(currSsps, grouperMetadata);
    Assert.assertEquals(expectedGroupingForStatefulAndStateless, finalGrouping);
    // SSPGrouperProxy for stateless job
    groupByPartition = buildSspGrouperProxy(false);
    finalGrouping = groupByPartition.group(currSsps, grouperMetadata);
    Assert.assertEquals(expectedGroupingForStatefulAndStateless, finalGrouping);
}
Also used : IntStream(java.util.stream.IntStream) StorageConfig(org.apache.samza.config.StorageConfig) ImmutableSet(com.google.common.collect.ImmutableSet) TaskName(org.apache.samza.container.TaskName) ImmutableMap(com.google.common.collect.ImmutableMap) Partition(org.apache.samza.Partition) Set(java.util.Set) GrouperMetadataImpl(org.apache.samza.container.grouper.task.GrouperMetadataImpl) HashMap(java.util.HashMap) Test(org.junit.Test) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) Collectors(java.util.stream.Collectors) HashSet(java.util.HashSet) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) Map(java.util.Map) GrouperMetadata(org.apache.samza.container.grouper.task.GrouperMetadata) Assert(org.junit.Assert) MapConfig(org.apache.samza.config.MapConfig) Partition(org.apache.samza.Partition) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) ImmutableSet(com.google.common.collect.ImmutableSet) Set(java.util.Set) HashSet(java.util.HashSet) GrouperMetadataImpl(org.apache.samza.container.grouper.task.GrouperMetadataImpl) TaskName(org.apache.samza.container.TaskName) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) GrouperMetadata(org.apache.samza.container.grouper.task.GrouperMetadata) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) Test(org.junit.Test)

Example 4 with GrouperMetadata

use of org.apache.samza.container.grouper.task.GrouperMetadata in project samza by apache.

the class TestGroupByPartitionWithGrouperProxy method testMultipleStreamsWithSingleStreamRepartitioning.

@Test
public void testMultipleStreamsWithSingleStreamRepartitioning() {
    Map<TaskName, List<SystemStreamPartition>> prevGroupingWithMultipleStreams = ImmutableMap.<TaskName, List<SystemStreamPartition>>builder().put(new TaskName("Partition 0"), ImmutableList.of(new SystemStreamPartition("kafka", "PVE", new Partition(0)), new SystemStreamPartition("kafka", "URE", new Partition(0)))).put(new TaskName("Partition 1"), ImmutableList.of(new SystemStreamPartition("kafka", "PVE", new Partition(1)), new SystemStreamPartition("kafka", "URE", new Partition(1)))).put(new TaskName("Partition 2"), ImmutableList.of(new SystemStreamPartition("kafka", "PVE", new Partition(2)), new SystemStreamPartition("kafka", "URE", new Partition(2)))).put(new TaskName("Partition 3"), ImmutableList.of(new SystemStreamPartition("kafka", "PVE", new Partition(3)), new SystemStreamPartition("kafka", "URE", new Partition(3)))).build();
    Set<SystemStreamPartition> currSsps = IntStream.range(0, 8).mapToObj(partitionId -> new SystemStreamPartition("kafka", "PVE", new Partition(partitionId))).collect(Collectors.toSet());
    IntStream.range(0, 4).forEach(partitionId -> currSsps.add(new SystemStreamPartition("kafka", "URE", new Partition(partitionId))));
    // New stream added to previous streams
    IntStream.range(0, 8).forEach(partitionId -> currSsps.add(new SystemStreamPartition("kafka", "BOB", new Partition(partitionId))));
    Map<TaskName, Set<SystemStreamPartition>> expectedGroupingForStateful = ImmutableMap.<TaskName, Set<SystemStreamPartition>>builder().put(new TaskName("Partition 1"), ImmutableSet.of(new SystemStreamPartition("kafka", "PVE", new Partition(1)), new SystemStreamPartition("kafka", "PVE", new Partition(5)), new SystemStreamPartition("kafka", "URE", new Partition(1)), new SystemStreamPartition("kafka", "BOB", new Partition(1)))).put(new TaskName("Partition 0"), ImmutableSet.of(new SystemStreamPartition("kafka", "PVE", new Partition(0)), new SystemStreamPartition("kafka", "PVE", new Partition(4)), new SystemStreamPartition("kafka", "URE", new Partition(0)), new SystemStreamPartition("kafka", "BOB", new Partition(0)))).put(new TaskName("Partition 3"), ImmutableSet.of(new SystemStreamPartition("kafka", "PVE", new Partition(7)), new SystemStreamPartition("kafka", "PVE", new Partition(3)), new SystemStreamPartition("kafka", "URE", new Partition(3)), new SystemStreamPartition("kafka", "BOB", new Partition(3)))).put(new TaskName("Partition 2"), ImmutableSet.of(new SystemStreamPartition("kafka", "PVE", new Partition(2)), new SystemStreamPartition("kafka", "PVE", new Partition(6)), new SystemStreamPartition("kafka", "URE", new Partition(2)), new SystemStreamPartition("kafka", "BOB", new Partition(2)))).put(new TaskName("Partition 5"), ImmutableSet.of(new SystemStreamPartition("kafka", "BOB", new Partition(5)))).put(new TaskName("Partition 4"), ImmutableSet.of(new SystemStreamPartition("kafka", "BOB", new Partition(4)))).put(new TaskName("Partition 7"), ImmutableSet.of(new SystemStreamPartition("kafka", "BOB", new Partition(7)))).put(new TaskName("Partition 6"), ImmutableSet.of(new SystemStreamPartition("kafka", "BOB", new Partition(6)))).build();
    Map<TaskName, Set<SystemStreamPartition>> expectedGroupingForStateless = ImmutableMap.<TaskName, Set<SystemStreamPartition>>builder().put(new TaskName("Partition 0"), ImmutableSet.of(new SystemStreamPartition("kafka", "PVE", new Partition(0)), new SystemStreamPartition("kafka", "URE", new Partition(0)), new SystemStreamPartition("kafka", "BOB", new Partition(0)))).put(new TaskName("Partition 1"), ImmutableSet.of(new SystemStreamPartition("kafka", "PVE", new Partition(1)), new SystemStreamPartition("kafka", "URE", new Partition(1)), new SystemStreamPartition("kafka", "BOB", new Partition(1)))).put(new TaskName("Partition 2"), ImmutableSet.of(new SystemStreamPartition("kafka", "PVE", new Partition(2)), new SystemStreamPartition("kafka", "URE", new Partition(2)), new SystemStreamPartition("kafka", "BOB", new Partition(2)))).put(new TaskName("Partition 3"), ImmutableSet.of(new SystemStreamPartition("kafka", "PVE", new Partition(3)), new SystemStreamPartition("kafka", "URE", new Partition(3)), new SystemStreamPartition("kafka", "BOB", new Partition(3)))).put(new TaskName("Partition 4"), ImmutableSet.of(new SystemStreamPartition("kafka", "PVE", new Partition(4)), new SystemStreamPartition("kafka", "BOB", new Partition(4)))).put(new TaskName("Partition 5"), ImmutableSet.of(new SystemStreamPartition("kafka", "PVE", new Partition(5)), new SystemStreamPartition("kafka", "BOB", new Partition(5)))).put(new TaskName("Partition 6"), ImmutableSet.of(new SystemStreamPartition("kafka", "PVE", new Partition(6)), new SystemStreamPartition("kafka", "BOB", new Partition(6)))).put(new TaskName("Partition 7"), ImmutableSet.of(new SystemStreamPartition("kafka", "PVE", new Partition(7)), new SystemStreamPartition("kafka", "BOB", new Partition(7)))).build();
    // SSPGrouperProxy for stateful job
    SSPGrouperProxy groupByPartition = buildSspGrouperProxy(true);
    GrouperMetadata grouperMetadata = new GrouperMetadataImpl(new HashMap<>(), new HashMap<>(), prevGroupingWithMultipleStreams, new HashMap<>());
    Map<TaskName, Set<SystemStreamPartition>> finalGrouping = groupByPartition.group(currSsps, grouperMetadata);
    Assert.assertEquals(expectedGroupingForStateful, finalGrouping);
    // SSPGrouperProxy for stateless job
    groupByPartition = buildSspGrouperProxy(false);
    finalGrouping = groupByPartition.group(currSsps, grouperMetadata);
    Assert.assertEquals(expectedGroupingForStateless, finalGrouping);
}
Also used : IntStream(java.util.stream.IntStream) StorageConfig(org.apache.samza.config.StorageConfig) ImmutableSet(com.google.common.collect.ImmutableSet) TaskName(org.apache.samza.container.TaskName) ImmutableMap(com.google.common.collect.ImmutableMap) Partition(org.apache.samza.Partition) Set(java.util.Set) GrouperMetadataImpl(org.apache.samza.container.grouper.task.GrouperMetadataImpl) HashMap(java.util.HashMap) Test(org.junit.Test) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) Collectors(java.util.stream.Collectors) HashSet(java.util.HashSet) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) Map(java.util.Map) GrouperMetadata(org.apache.samza.container.grouper.task.GrouperMetadata) Assert(org.junit.Assert) MapConfig(org.apache.samza.config.MapConfig) Partition(org.apache.samza.Partition) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) ImmutableSet(com.google.common.collect.ImmutableSet) Set(java.util.Set) HashSet(java.util.HashSet) GrouperMetadataImpl(org.apache.samza.container.grouper.task.GrouperMetadataImpl) TaskName(org.apache.samza.container.TaskName) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) GrouperMetadata(org.apache.samza.container.grouper.task.GrouperMetadata) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) Test(org.junit.Test)

Example 5 with GrouperMetadata

use of org.apache.samza.container.grouper.task.GrouperMetadata in project samza by apache.

the class TestGroupByPartitionWithGrouperProxy method testRemovalAndAdditionOfStreamsWithExpansion.

@Test
public void testRemovalAndAdditionOfStreamsWithExpansion() {
    Map<TaskName, List<SystemStreamPartition>> prevGroupingWithMultipleStreams = ImmutableMap.<TaskName, List<SystemStreamPartition>>builder().put(new TaskName("Partition 0"), ImmutableList.of(new SystemStreamPartition("kafka", "PVE", new Partition(0)), new SystemStreamPartition("kafka", "URE", new Partition(0)))).put(new TaskName("Partition 1"), ImmutableList.of(new SystemStreamPartition("kafka", "PVE", new Partition(1)), new SystemStreamPartition("kafka", "URE", new Partition(1)))).put(new TaskName("Partition 2"), ImmutableList.of(new SystemStreamPartition("kafka", "PVE", new Partition(2)), new SystemStreamPartition("kafka", "URE", new Partition(2)))).put(new TaskName("Partition 3"), ImmutableList.of(new SystemStreamPartition("kafka", "PVE", new Partition(3)), new SystemStreamPartition("kafka", "URE", new Partition(3)))).build();
    Set<SystemStreamPartition> currSsps = IntStream.range(0, 8).mapToObj(partitionId -> new SystemStreamPartition("kafka", "BOB", new Partition(partitionId))).collect(Collectors.toSet());
    IntStream.range(0, 8).forEach(partitionId -> currSsps.add(new SystemStreamPartition("kafka", "PVE", new Partition(partitionId))));
    // expected grouping
    Map<TaskName, Set<SystemStreamPartition>> expectedGroupingForStateful = ImmutableMap.<TaskName, Set<SystemStreamPartition>>builder().put(new TaskName("Partition 1"), ImmutableSet.of(new SystemStreamPartition("kafka", "PVE", new Partition(1)), new SystemStreamPartition("kafka", "PVE", new Partition(5)), new SystemStreamPartition("kafka", "BOB", new Partition(1)))).put(new TaskName("Partition 0"), ImmutableSet.of(new SystemStreamPartition("kafka", "PVE", new Partition(0)), new SystemStreamPartition("kafka", "PVE", new Partition(4)), new SystemStreamPartition("kafka", "BOB", new Partition(0)))).put(new TaskName("Partition 3"), ImmutableSet.of(new SystemStreamPartition("kafka", "PVE", new Partition(7)), new SystemStreamPartition("kafka", "PVE", new Partition(3)), new SystemStreamPartition("kafka", "BOB", new Partition(3)))).put(new TaskName("Partition 2"), ImmutableSet.of(new SystemStreamPartition("kafka", "PVE", new Partition(2)), new SystemStreamPartition("kafka", "PVE", new Partition(6)), new SystemStreamPartition("kafka", "BOB", new Partition(2)))).put(new TaskName("Partition 5"), ImmutableSet.of(new SystemStreamPartition("kafka", "BOB", new Partition(5)))).put(new TaskName("Partition 4"), ImmutableSet.of(new SystemStreamPartition("kafka", "BOB", new Partition(4)))).put(new TaskName("Partition 7"), ImmutableSet.of(new SystemStreamPartition("kafka", "BOB", new Partition(7)))).put(new TaskName("Partition 6"), ImmutableSet.of(new SystemStreamPartition("kafka", "BOB", new Partition(6)))).build();
    // expected grouping
    Map<TaskName, Set<SystemStreamPartition>> expectedGroupingForStateless = ImmutableMap.<TaskName, Set<SystemStreamPartition>>builder().put(new TaskName("Partition 1"), ImmutableSet.of(new SystemStreamPartition("kafka", "PVE", new Partition(1)), new SystemStreamPartition("kafka", "BOB", new Partition(1)))).put(new TaskName("Partition 0"), ImmutableSet.of(new SystemStreamPartition("kafka", "PVE", new Partition(0)), new SystemStreamPartition("kafka", "BOB", new Partition(0)))).put(new TaskName("Partition 3"), ImmutableSet.of(new SystemStreamPartition("kafka", "PVE", new Partition(3)), new SystemStreamPartition("kafka", "BOB", new Partition(3)))).put(new TaskName("Partition 2"), ImmutableSet.of(new SystemStreamPartition("kafka", "PVE", new Partition(2)), new SystemStreamPartition("kafka", "BOB", new Partition(2)))).put(new TaskName("Partition 5"), ImmutableSet.of(new SystemStreamPartition("kafka", "BOB", new Partition(5)), new SystemStreamPartition("kafka", "PVE", new Partition(5)))).put(new TaskName("Partition 4"), ImmutableSet.of(new SystemStreamPartition("kafka", "BOB", new Partition(4)), new SystemStreamPartition("kafka", "PVE", new Partition(4)))).put(new TaskName("Partition 7"), ImmutableSet.of(new SystemStreamPartition("kafka", "BOB", new Partition(7)), new SystemStreamPartition("kafka", "PVE", new Partition(7)))).put(new TaskName("Partition 6"), ImmutableSet.of(new SystemStreamPartition("kafka", "BOB", new Partition(6)), new SystemStreamPartition("kafka", "PVE", new Partition(6)))).build();
    // SSPGrouperProxy for stateful job
    SSPGrouperProxy groupByPartition = buildSspGrouperProxy(true);
    GrouperMetadata grouperMetadata = new GrouperMetadataImpl(new HashMap<>(), new HashMap<>(), prevGroupingWithMultipleStreams, new HashMap<>());
    Map<TaskName, Set<SystemStreamPartition>> finalGrouping = groupByPartition.group(currSsps, grouperMetadata);
    Assert.assertEquals(expectedGroupingForStateful, finalGrouping);
    // SSPGrouperProxy for stateless job
    groupByPartition = buildSspGrouperProxy(false);
    finalGrouping = groupByPartition.group(currSsps, grouperMetadata);
    Assert.assertEquals(expectedGroupingForStateless, finalGrouping);
}
Also used : IntStream(java.util.stream.IntStream) StorageConfig(org.apache.samza.config.StorageConfig) ImmutableSet(com.google.common.collect.ImmutableSet) TaskName(org.apache.samza.container.TaskName) ImmutableMap(com.google.common.collect.ImmutableMap) Partition(org.apache.samza.Partition) Set(java.util.Set) GrouperMetadataImpl(org.apache.samza.container.grouper.task.GrouperMetadataImpl) HashMap(java.util.HashMap) Test(org.junit.Test) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) Collectors(java.util.stream.Collectors) HashSet(java.util.HashSet) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) Map(java.util.Map) GrouperMetadata(org.apache.samza.container.grouper.task.GrouperMetadata) Assert(org.junit.Assert) MapConfig(org.apache.samza.config.MapConfig) Partition(org.apache.samza.Partition) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) ImmutableSet(com.google.common.collect.ImmutableSet) Set(java.util.Set) HashSet(java.util.HashSet) GrouperMetadataImpl(org.apache.samza.container.grouper.task.GrouperMetadataImpl) TaskName(org.apache.samza.container.TaskName) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) GrouperMetadata(org.apache.samza.container.grouper.task.GrouperMetadata) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) Test(org.junit.Test)

Aggregations

GrouperMetadata (org.apache.samza.container.grouper.task.GrouperMetadata)17 GrouperMetadataImpl (org.apache.samza.container.grouper.task.GrouperMetadataImpl)14 List (java.util.List)12 Set (java.util.Set)12 TaskName (org.apache.samza.container.TaskName)12 SystemStreamPartition (org.apache.samza.system.SystemStreamPartition)12 HashMap (java.util.HashMap)11 Map (java.util.Map)11 Collectors (java.util.stream.Collectors)11 ImmutableList (com.google.common.collect.ImmutableList)10 ImmutableSet (com.google.common.collect.ImmutableSet)10 Partition (org.apache.samza.Partition)10 MapConfig (org.apache.samza.config.MapConfig)10 Test (org.junit.Test)10 ImmutableMap (com.google.common.collect.ImmutableMap)9 IntStream (java.util.stream.IntStream)9 StorageConfig (org.apache.samza.config.StorageConfig)9 Assert (org.junit.Assert)9 HashSet (java.util.HashSet)7 JobModel (org.apache.samza.job.model.JobModel)5