use of org.apache.samza.Partition in project samza by apache.
the class TestDirectoryPartitioner method testBasicGrouping.
@Test
public void testBasicGrouping() {
List<FileMetadata> testList = new ArrayList<>();
int NUM_INPUT = 9;
String[] inputFiles = { "00_10-run_2016-08-15-13-04-part.0.150582.avro", "00_10-run_2016-08-15-13-04-part.1.138132.avro", "00_10-run_2016-08-15-13-04-part.2.214005.avro", "00_10-run_2016-08-15-13-05-part.0.205738.avro", "00_10-run_2016-08-15-13-05-part.1.158273.avro", "00_10-run_2016-08-15-13-05-part.2.982345.avro", "00_10-run_2016-08-15-13-06-part.0.313245.avro", "00_10-run_2016-08-15-13-06-part.1.234212.avro", "00_10-run_2016-08-15-13-06-part.2.413232.avro" };
long[] fileLength = { 150582, 138132, 214005, 205738, 158273, 982345, 313245, 234212, 413232 };
for (int i = 0; i < NUM_INPUT; i++) {
testList.add(new FileMetadata(inputFiles[i], fileLength[i]));
}
String whiteList = ".*\\.avro";
String blackList = "";
// 00_10-run_2016-08-15-13-04-part.[id].138132.avro
String groupPattern = ".*part\\.[id]\\..*\\.avro";
int EXPECTED_NUM_PARTITION = 3;
int[][] EXPECTED_PARTITIONING = { // files from index 0, 3, 6 should be grouped into one partition
{ 0, 3, 6 }, // similar as above
{ 1, 4, 7 }, { 2, 5, 8 } };
DirectoryPartitioner directoryPartitioner = new DirectoryPartitioner(whiteList, blackList, groupPattern, new TestFileSystemAdapter(testList));
Map<Partition, SystemStreamPartitionMetadata> metadataMap = directoryPartitioner.getPartitionMetadataMap("hdfs", null);
Assert.assertEquals(EXPECTED_NUM_PARTITION, metadataMap.size());
Map<Partition, List<String>> descriporMap = directoryPartitioner.getPartitionDescriptor("hdfs");
verifyPartitionDescriptor(inputFiles, EXPECTED_PARTITIONING, EXPECTED_NUM_PARTITION, descriporMap);
}
use of org.apache.samza.Partition in project samza by apache.
the class TestHdfsFileSystemAdapter method testIntegrationWithPartitioner.
@Test
public void testIntegrationWithPartitioner() throws Exception {
URL url = this.getClass().getResource("/partitioner");
String whiteList = ".*";
String blackList = ".*02";
String groupPattern = "";
String streamName = String.format(url.getPath());
DirectoryPartitioner directoryPartitioner = new DirectoryPartitioner(whiteList, blackList, groupPattern, new HdfsFileSystemAdapter());
Map<Partition, SystemStreamMetadata.SystemStreamPartitionMetadata> metadataMap = directoryPartitioner.getPartitionMetadataMap(streamName, null);
Assert.assertEquals(1, metadataMap.size());
Map<Partition, List<String>> descriporMap = directoryPartitioner.getPartitionDescriptor(streamName);
Assert.assertEquals(1, descriporMap.values().size());
Assert.assertTrue(descriporMap.get(new Partition(0)).get(0).endsWith("testfile01"));
}
use of org.apache.samza.Partition in project samza by apache.
the class TestDirectoryPartitioner method testWhiteListBlackListFiltering.
@Test
public void testWhiteListBlackListFiltering() {
List<FileMetadata> testList = new ArrayList<>();
int NUM_INPUT = 9;
String[] inputFiles = { "part-001.avro", "part-002.avro", "part-003.avro", "delta-01.avro", "part-005.avro", "delta-03.avro", "part-004.avro", "delta-02.avro", "part-006.avro" };
long[] fileLength = { 150582, 138132, 214005, 205738, 158273, 982345, 313245, 234212, 413232 };
for (int i = 0; i < NUM_INPUT; i++) {
testList.add(new FileMetadata(inputFiles[i], fileLength[i]));
}
String whiteList = "part-.*\\.avro";
String blackList = "part-002.avro";
String groupPattern = "";
int EXPECTED_NUM_PARTITION = 5;
int[][] EXPECTED_PARTITIONING = { { 0 }, { 2 }, { 4 }, { 6 }, { 8 } };
DirectoryPartitioner directoryPartitioner = new DirectoryPartitioner(whiteList, blackList, groupPattern, new TestFileSystemAdapter(testList));
Map<Partition, SystemStreamPartitionMetadata> metadataMap = directoryPartitioner.getPartitionMetadataMap("hdfs", null);
Assert.assertEquals(EXPECTED_NUM_PARTITION, metadataMap.size());
Map<Partition, List<String>> descriporMap = directoryPartitioner.getPartitionDescriptor("hdfs");
verifyPartitionDescriptor(inputFiles, EXPECTED_PARTITIONING, EXPECTED_NUM_PARTITION, descriporMap);
}
use of org.apache.samza.Partition in project samza by apache.
the class TestStorageRecovery method putMetadata.
private void putMetadata() {
SystemStreamMetadata.SystemStreamPartitionMetadata sspm = new SystemStreamMetadata.SystemStreamPartitionMetadata("0", "1", "2");
HashMap<Partition, SystemStreamPartitionMetadata> map = new HashMap<Partition, SystemStreamPartitionMetadata>();
map.put(new Partition(0), sspm);
map.put(new Partition(1), sspm);
systemStreamMetadata = new SystemStreamMetadata(SYSTEM_STREAM_NAME, map);
HashMap<Partition, SystemStreamPartitionMetadata> map1 = new HashMap<Partition, SystemStreamPartitionMetadata>();
map1.put(new Partition(0), sspm);
map1.put(new Partition(1), sspm);
inputSystemStreamMetadata = new SystemStreamMetadata(INPUT_STREAM, map1);
}
use of org.apache.samza.Partition in project samza by apache.
the class TestAsyncRunLoop method testEndOfStreamOffsetManagement.
// TODO: Add assertions.
//@Test
public void testEndOfStreamOffsetManagement() throws Exception {
//explicitly configure to disable commits inside process or window calls and invoke commit from end of stream
TestTask mockStreamTask1 = new TestTask(true, false, false, null);
TestTask mockStreamTask2 = new TestTask(true, false, false, null);
Partition p1 = new Partition(1);
Partition p2 = new Partition(2);
SystemStreamPartition ssp1 = new SystemStreamPartition("system1", "stream1", p1);
SystemStreamPartition ssp2 = new SystemStreamPartition("system1", "stream2", p2);
IncomingMessageEnvelope envelope1 = new IncomingMessageEnvelope(ssp2, "1", "key1", "message1");
IncomingMessageEnvelope envelope2 = new IncomingMessageEnvelope(ssp2, "2", "key1", "message1");
IncomingMessageEnvelope envelope3 = IncomingMessageEnvelope.buildEndOfStreamEnvelope(ssp2);
Map<SystemStreamPartition, List<IncomingMessageEnvelope>> sspMap = new HashMap<>();
List<IncomingMessageEnvelope> messageList = new ArrayList<>();
messageList.add(envelope1);
messageList.add(envelope2);
messageList.add(envelope3);
sspMap.put(ssp2, messageList);
SystemConsumer mockConsumer = mock(SystemConsumer.class);
when(mockConsumer.poll(anyObject(), anyLong())).thenReturn(sspMap);
HashMap<String, SystemConsumer> systemConsumerMap = new HashMap<>();
systemConsumerMap.put("system1", mockConsumer);
SystemConsumers consumers = TestSystemConsumers.getSystemConsumers(systemConsumerMap);
TaskName taskName1 = new TaskName("task1");
TaskName taskName2 = new TaskName("task2");
Set<TaskName> taskNames = new HashSet<>();
taskNames.add(taskName1);
taskNames.add(taskName2);
OffsetManager offsetManager = mock(OffsetManager.class);
when(offsetManager.getLastProcessedOffset(taskName1, ssp1)).thenReturn(Option.apply("3"));
when(offsetManager.getLastProcessedOffset(taskName2, ssp2)).thenReturn(Option.apply("0"));
when(offsetManager.getStartingOffset(taskName1, ssp1)).thenReturn(Option.apply(IncomingMessageEnvelope.END_OF_STREAM_OFFSET));
when(offsetManager.getStartingOffset(taskName2, ssp2)).thenReturn(Option.apply("1"));
TaskInstance taskInstance1 = createTaskInstance(mockStreamTask1, taskName1, ssp1, offsetManager, consumers);
TaskInstance taskInstance2 = createTaskInstance(mockStreamTask2, taskName2, ssp2, offsetManager, consumers);
Map<TaskName, TaskInstance> tasks = new HashMap<>();
tasks.put(taskName1, taskInstance1);
tasks.put(taskName2, taskInstance2);
taskInstance1.registerConsumers();
taskInstance2.registerConsumers();
consumers.start();
int maxMessagesInFlight = 1;
AsyncRunLoop runLoop = new AsyncRunLoop(tasks, executor, consumers, maxMessagesInFlight, windowMs, commitMs, callbackTimeoutMs, maxThrottlingDelayMs, containerMetrics, () -> 0L, false);
runLoop.run();
}
Aggregations