Search in sources :

Example 31 with Partition

use of org.apache.samza.Partition in project samza by apache.

the class TestDirectoryPartitioner method testBasicGrouping.

@Test
public void testBasicGrouping() {
    List<FileMetadata> testList = new ArrayList<>();
    int NUM_INPUT = 9;
    String[] inputFiles = { "00_10-run_2016-08-15-13-04-part.0.150582.avro", "00_10-run_2016-08-15-13-04-part.1.138132.avro", "00_10-run_2016-08-15-13-04-part.2.214005.avro", "00_10-run_2016-08-15-13-05-part.0.205738.avro", "00_10-run_2016-08-15-13-05-part.1.158273.avro", "00_10-run_2016-08-15-13-05-part.2.982345.avro", "00_10-run_2016-08-15-13-06-part.0.313245.avro", "00_10-run_2016-08-15-13-06-part.1.234212.avro", "00_10-run_2016-08-15-13-06-part.2.413232.avro" };
    long[] fileLength = { 150582, 138132, 214005, 205738, 158273, 982345, 313245, 234212, 413232 };
    for (int i = 0; i < NUM_INPUT; i++) {
        testList.add(new FileMetadata(inputFiles[i], fileLength[i]));
    }
    String whiteList = ".*\\.avro";
    String blackList = "";
    // 00_10-run_2016-08-15-13-04-part.[id].138132.avro
    String groupPattern = ".*part\\.[id]\\..*\\.avro";
    int EXPECTED_NUM_PARTITION = 3;
    int[][] EXPECTED_PARTITIONING = { // files from index 0, 3, 6 should be grouped into one partition
    { 0, 3, 6 }, // similar as above
    { 1, 4, 7 }, { 2, 5, 8 } };
    DirectoryPartitioner directoryPartitioner = new DirectoryPartitioner(whiteList, blackList, groupPattern, new TestFileSystemAdapter(testList));
    Map<Partition, SystemStreamPartitionMetadata> metadataMap = directoryPartitioner.getPartitionMetadataMap("hdfs", null);
    Assert.assertEquals(EXPECTED_NUM_PARTITION, metadataMap.size());
    Map<Partition, List<String>> descriporMap = directoryPartitioner.getPartitionDescriptor("hdfs");
    verifyPartitionDescriptor(inputFiles, EXPECTED_PARTITIONING, EXPECTED_NUM_PARTITION, descriporMap);
}
Also used : Partition(org.apache.samza.Partition) ArrayList(java.util.ArrayList) FileMetadata(org.apache.samza.system.hdfs.partitioner.FileSystemAdapter.FileMetadata) SystemStreamPartitionMetadata(org.apache.samza.system.SystemStreamMetadata.SystemStreamPartitionMetadata) List(java.util.List) ArrayList(java.util.ArrayList) Test(org.junit.Test)

Example 32 with Partition

use of org.apache.samza.Partition in project samza by apache.

the class TestHdfsFileSystemAdapter method testIntegrationWithPartitioner.

@Test
public void testIntegrationWithPartitioner() throws Exception {
    URL url = this.getClass().getResource("/partitioner");
    String whiteList = ".*";
    String blackList = ".*02";
    String groupPattern = "";
    String streamName = String.format(url.getPath());
    DirectoryPartitioner directoryPartitioner = new DirectoryPartitioner(whiteList, blackList, groupPattern, new HdfsFileSystemAdapter());
    Map<Partition, SystemStreamMetadata.SystemStreamPartitionMetadata> metadataMap = directoryPartitioner.getPartitionMetadataMap(streamName, null);
    Assert.assertEquals(1, metadataMap.size());
    Map<Partition, List<String>> descriporMap = directoryPartitioner.getPartitionDescriptor(streamName);
    Assert.assertEquals(1, descriporMap.values().size());
    Assert.assertTrue(descriporMap.get(new Partition(0)).get(0).endsWith("testfile01"));
}
Also used : Partition(org.apache.samza.Partition) List(java.util.List) URL(java.net.URL) Test(org.junit.Test)

Example 33 with Partition

use of org.apache.samza.Partition in project samza by apache.

the class TestDirectoryPartitioner method testWhiteListBlackListFiltering.

@Test
public void testWhiteListBlackListFiltering() {
    List<FileMetadata> testList = new ArrayList<>();
    int NUM_INPUT = 9;
    String[] inputFiles = { "part-001.avro", "part-002.avro", "part-003.avro", "delta-01.avro", "part-005.avro", "delta-03.avro", "part-004.avro", "delta-02.avro", "part-006.avro" };
    long[] fileLength = { 150582, 138132, 214005, 205738, 158273, 982345, 313245, 234212, 413232 };
    for (int i = 0; i < NUM_INPUT; i++) {
        testList.add(new FileMetadata(inputFiles[i], fileLength[i]));
    }
    String whiteList = "part-.*\\.avro";
    String blackList = "part-002.avro";
    String groupPattern = "";
    int EXPECTED_NUM_PARTITION = 5;
    int[][] EXPECTED_PARTITIONING = { { 0 }, { 2 }, { 4 }, { 6 }, { 8 } };
    DirectoryPartitioner directoryPartitioner = new DirectoryPartitioner(whiteList, blackList, groupPattern, new TestFileSystemAdapter(testList));
    Map<Partition, SystemStreamPartitionMetadata> metadataMap = directoryPartitioner.getPartitionMetadataMap("hdfs", null);
    Assert.assertEquals(EXPECTED_NUM_PARTITION, metadataMap.size());
    Map<Partition, List<String>> descriporMap = directoryPartitioner.getPartitionDescriptor("hdfs");
    verifyPartitionDescriptor(inputFiles, EXPECTED_PARTITIONING, EXPECTED_NUM_PARTITION, descriporMap);
}
Also used : Partition(org.apache.samza.Partition) ArrayList(java.util.ArrayList) FileMetadata(org.apache.samza.system.hdfs.partitioner.FileSystemAdapter.FileMetadata) SystemStreamPartitionMetadata(org.apache.samza.system.SystemStreamMetadata.SystemStreamPartitionMetadata) List(java.util.List) ArrayList(java.util.ArrayList) Test(org.junit.Test)

Example 34 with Partition

use of org.apache.samza.Partition in project samza by apache.

the class TestStorageRecovery method putMetadata.

private void putMetadata() {
    SystemStreamMetadata.SystemStreamPartitionMetadata sspm = new SystemStreamMetadata.SystemStreamPartitionMetadata("0", "1", "2");
    HashMap<Partition, SystemStreamPartitionMetadata> map = new HashMap<Partition, SystemStreamPartitionMetadata>();
    map.put(new Partition(0), sspm);
    map.put(new Partition(1), sspm);
    systemStreamMetadata = new SystemStreamMetadata(SYSTEM_STREAM_NAME, map);
    HashMap<Partition, SystemStreamPartitionMetadata> map1 = new HashMap<Partition, SystemStreamPartitionMetadata>();
    map1.put(new Partition(0), sspm);
    map1.put(new Partition(1), sspm);
    inputSystemStreamMetadata = new SystemStreamMetadata(INPUT_STREAM, map1);
}
Also used : Partition(org.apache.samza.Partition) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) SystemStreamPartitionMetadata(org.apache.samza.system.SystemStreamMetadata.SystemStreamPartitionMetadata) HashMap(java.util.HashMap) SystemStreamMetadata(org.apache.samza.system.SystemStreamMetadata) SystemStreamPartitionMetadata(org.apache.samza.system.SystemStreamMetadata.SystemStreamPartitionMetadata)

Example 35 with Partition

use of org.apache.samza.Partition in project samza by apache.

the class TestAsyncRunLoop method testEndOfStreamOffsetManagement.

// TODO: Add assertions.
//@Test
public void testEndOfStreamOffsetManagement() throws Exception {
    //explicitly configure to disable commits inside process or window calls and invoke commit from end of stream
    TestTask mockStreamTask1 = new TestTask(true, false, false, null);
    TestTask mockStreamTask2 = new TestTask(true, false, false, null);
    Partition p1 = new Partition(1);
    Partition p2 = new Partition(2);
    SystemStreamPartition ssp1 = new SystemStreamPartition("system1", "stream1", p1);
    SystemStreamPartition ssp2 = new SystemStreamPartition("system1", "stream2", p2);
    IncomingMessageEnvelope envelope1 = new IncomingMessageEnvelope(ssp2, "1", "key1", "message1");
    IncomingMessageEnvelope envelope2 = new IncomingMessageEnvelope(ssp2, "2", "key1", "message1");
    IncomingMessageEnvelope envelope3 = IncomingMessageEnvelope.buildEndOfStreamEnvelope(ssp2);
    Map<SystemStreamPartition, List<IncomingMessageEnvelope>> sspMap = new HashMap<>();
    List<IncomingMessageEnvelope> messageList = new ArrayList<>();
    messageList.add(envelope1);
    messageList.add(envelope2);
    messageList.add(envelope3);
    sspMap.put(ssp2, messageList);
    SystemConsumer mockConsumer = mock(SystemConsumer.class);
    when(mockConsumer.poll(anyObject(), anyLong())).thenReturn(sspMap);
    HashMap<String, SystemConsumer> systemConsumerMap = new HashMap<>();
    systemConsumerMap.put("system1", mockConsumer);
    SystemConsumers consumers = TestSystemConsumers.getSystemConsumers(systemConsumerMap);
    TaskName taskName1 = new TaskName("task1");
    TaskName taskName2 = new TaskName("task2");
    Set<TaskName> taskNames = new HashSet<>();
    taskNames.add(taskName1);
    taskNames.add(taskName2);
    OffsetManager offsetManager = mock(OffsetManager.class);
    when(offsetManager.getLastProcessedOffset(taskName1, ssp1)).thenReturn(Option.apply("3"));
    when(offsetManager.getLastProcessedOffset(taskName2, ssp2)).thenReturn(Option.apply("0"));
    when(offsetManager.getStartingOffset(taskName1, ssp1)).thenReturn(Option.apply(IncomingMessageEnvelope.END_OF_STREAM_OFFSET));
    when(offsetManager.getStartingOffset(taskName2, ssp2)).thenReturn(Option.apply("1"));
    TaskInstance taskInstance1 = createTaskInstance(mockStreamTask1, taskName1, ssp1, offsetManager, consumers);
    TaskInstance taskInstance2 = createTaskInstance(mockStreamTask2, taskName2, ssp2, offsetManager, consumers);
    Map<TaskName, TaskInstance> tasks = new HashMap<>();
    tasks.put(taskName1, taskInstance1);
    tasks.put(taskName2, taskInstance2);
    taskInstance1.registerConsumers();
    taskInstance2.registerConsumers();
    consumers.start();
    int maxMessagesInFlight = 1;
    AsyncRunLoop runLoop = new AsyncRunLoop(tasks, executor, consumers, maxMessagesInFlight, windowMs, commitMs, callbackTimeoutMs, maxThrottlingDelayMs, containerMetrics, () -> 0L, false);
    runLoop.run();
}
Also used : SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) Partition(org.apache.samza.Partition) SystemConsumer(org.apache.samza.system.SystemConsumer) TaskInstance(org.apache.samza.container.TaskInstance) SystemConsumers(org.apache.samza.system.SystemConsumers) TestSystemConsumers(org.apache.samza.system.TestSystemConsumers) HashMap(java.util.HashMap) IncomingMessageEnvelope(org.apache.samza.system.IncomingMessageEnvelope) OffsetManager(org.apache.samza.checkpoint.OffsetManager) ArrayList(java.util.ArrayList) TaskName(org.apache.samza.container.TaskName) ArrayList(java.util.ArrayList) List(java.util.List) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) HashSet(java.util.HashSet)

Aggregations

Partition (org.apache.samza.Partition)42 Test (org.junit.Test)31 SystemStreamPartition (org.apache.samza.system.SystemStreamPartition)30 List (java.util.List)15 HashMap (java.util.HashMap)13 IncomingMessageEnvelope (org.apache.samza.system.IncomingMessageEnvelope)11 ArrayList (java.util.ArrayList)10 SystemStreamPartitionMetadata (org.apache.samza.system.SystemStreamMetadata.SystemStreamPartitionMetadata)8 HashSet (java.util.HashSet)7 FileMetadata (org.apache.samza.system.hdfs.partitioner.FileSystemAdapter.FileMetadata)7 GenericRecord (org.apache.avro.generic.GenericRecord)6 TaskName (org.apache.samza.container.TaskName)6 SamzaException (org.apache.samza.SamzaException)5 Config (org.apache.samza.config.Config)5 SystemStreamMetadata (org.apache.samza.system.SystemStreamMetadata)5 SystemStream (org.apache.samza.system.SystemStream)4 LinkedHashMap (java.util.LinkedHashMap)3 MapConfig (org.apache.samza.config.MapConfig)3 SinglePartitionWithoutOffsetsSystemAdmin (org.apache.samza.util.SinglePartitionWithoutOffsetsSystemAdmin)3 MetricsRegistryMap (org.apache.samza.metrics.MetricsRegistryMap)2