use of org.apache.samza.system.SystemStreamMetadata.SystemStreamPartitionMetadata in project samza by apache.
the class TestDirectoryPartitioner method testBasicGrouping.
@Test
public void testBasicGrouping() {
List<FileMetadata> testList = new ArrayList<>();
int numInput = 9;
String[] inputFiles = { "00_10-run_2016-08-15-13-04-part.0.150582.avro", "00_10-run_2016-08-15-13-04-part.1.138132.avro", "00_10-run_2016-08-15-13-04-part.2.214005.avro", "00_10-run_2016-08-15-13-05-part.0.205738.avro", "00_10-run_2016-08-15-13-05-part.1.158273.avro", "00_10-run_2016-08-15-13-05-part.2.982345.avro", "00_10-run_2016-08-15-13-06-part.0.313245.avro", "00_10-run_2016-08-15-13-06-part.1.234212.avro", "00_10-run_2016-08-15-13-06-part.2.413232.avro" };
long[] fileLength = { 150582, 138132, 214005, 205738, 158273, 982345, 313245, 234212, 413232 };
for (int i = 0; i < numInput; i++) {
testList.add(new FileMetadata(inputFiles[i], fileLength[i]));
}
String whiteList = ".*\\.avro";
String blackList = "";
// 00_10-run_2016-08-15-13-04-part.[id].138132.avro
String groupPattern = ".*part\\.[id]\\..*\\.avro";
int expectedNumPartition = 3;
int[][] expectedPartitioning = { // files from index 0, 3, 6 should be grouped into one partition
{ 0, 3, 6 }, // similar as above
{ 1, 4, 7 }, { 2, 5, 8 } };
DirectoryPartitioner directoryPartitioner = new DirectoryPartitioner(whiteList, blackList, groupPattern, new TestFileSystemAdapter(testList));
Map<Partition, SystemStreamPartitionMetadata> metadataMap = directoryPartitioner.getPartitionMetadataMap("hdfs", null);
Assert.assertEquals(expectedNumPartition, metadataMap.size());
Map<Partition, List<String>> descriporMap = directoryPartitioner.getPartitionDescriptor("hdfs");
verifyPartitionDescriptor(inputFiles, expectedPartitioning, expectedNumPartition, descriporMap);
}
use of org.apache.samza.system.SystemStreamMetadata.SystemStreamPartitionMetadata in project samza by apache.
the class TestDirectoryPartitioner method testBasicWhiteListFiltering.
@Test
public void testBasicWhiteListFiltering() {
List<FileMetadata> testList = new ArrayList<>();
int numInput = 9;
String[] inputFiles = { "part-001.avro", "part-002.avro", "part-003.avro", "delta-01.avro", "part-005.avro", "delta-03.avro", "part-004.avro", "delta-02.avro", "part-006.avro" };
long[] fileLength = { 150582, 138132, 214005, 205738, 158273, 982345, 313245, 234212, 413232 };
for (int i = 0; i < numInput; i++) {
testList.add(new FileMetadata(inputFiles[i], fileLength[i]));
}
String whiteList = "part-.*\\.avro";
String blackList = "";
String groupPattern = "";
int expectedNumPartition = 6;
int[][] expectedPartitioning = { { 0 }, { 1 }, { 2 }, { 4 }, { 6 }, { 8 } };
DirectoryPartitioner directoryPartitioner = new DirectoryPartitioner(whiteList, blackList, groupPattern, new TestFileSystemAdapter(testList));
Map<Partition, SystemStreamPartitionMetadata> metadataMap = directoryPartitioner.getPartitionMetadataMap("hdfs", null);
Assert.assertEquals(expectedNumPartition, metadataMap.size());
Map<Partition, List<String>> descriptorMap = directoryPartitioner.getPartitionDescriptor("hdfs");
verifyPartitionDescriptor(inputFiles, expectedPartitioning, expectedNumPartition, descriptorMap);
}
use of org.apache.samza.system.SystemStreamMetadata.SystemStreamPartitionMetadata in project samza by apache.
the class TestDirectoryPartitioner method testWhiteListBlackListFiltering.
@Test
public void testWhiteListBlackListFiltering() {
List<FileMetadata> testList = new ArrayList<>();
int numInput = 9;
String[] inputFiles = { "part-001.avro", "part-002.avro", "part-003.avro", "delta-01.avro", "part-005.avro", "delta-03.avro", "part-004.avro", "delta-02.avro", "part-006.avro" };
long[] fileLength = { 150582, 138132, 214005, 205738, 158273, 982345, 313245, 234212, 413232 };
for (int i = 0; i < numInput; i++) {
testList.add(new FileMetadata(inputFiles[i], fileLength[i]));
}
String whiteList = "part-.*\\.avro";
String blackList = "part-002.avro";
String groupPattern = "";
int expectedNumPartition = 5;
int[][] expectedPartitioning = { { 0 }, { 2 }, { 4 }, { 6 }, { 8 } };
DirectoryPartitioner directoryPartitioner = new DirectoryPartitioner(whiteList, blackList, groupPattern, new TestFileSystemAdapter(testList));
Map<Partition, SystemStreamPartitionMetadata> metadataMap = directoryPartitioner.getPartitionMetadataMap("hdfs", null);
Assert.assertEquals(expectedNumPartition, metadataMap.size());
Map<Partition, List<String>> descriporMap = directoryPartitioner.getPartitionDescriptor("hdfs");
verifyPartitionDescriptor(inputFiles, expectedPartitioning, expectedNumPartition, descriporMap);
}
use of org.apache.samza.system.SystemStreamMetadata.SystemStreamPartitionMetadata in project samza by apache.
the class TestRepartitionJoinWindowApp method ittestRepartitionJoinWindowAppAndDeleteMessagesOnCommit.
@Test
public void ittestRepartitionJoinWindowAppAndDeleteMessagesOnCommit() throws Exception {
String inputTopicName1 = "page-views2";
String inputTopicName2 = "ad-clicks2";
String outputTopicName = "user-ad-click-counts2";
initializeTopics(inputTopicName1, inputTopicName2, outputTopicName);
// run the application
RepartitionJoinWindowApp app = new RepartitionJoinWindowApp();
final String appName = "UserPageAdClickCounter2";
Map<String, String> configs = new HashMap<>();
configs.put(JobCoordinatorConfig.JOB_COORDINATOR_FACTORY, "org.apache.samza.standalone.PassthroughJobCoordinatorFactory");
configs.put(JobConfig.PROCESSOR_ID, "0");
configs.put(TaskConfig.GROUPER_FACTORY, "org.apache.samza.container.grouper.task.GroupByContainerIdsFactory");
configs.put("systems.kafka.samza.delete.committed.messages", "true");
configs.put(RepartitionJoinWindowApp.INPUT_TOPIC_1_CONFIG_KEY, inputTopicName1);
configs.put(RepartitionJoinWindowApp.INPUT_TOPIC_2_CONFIG_KEY, inputTopicName2);
configs.put(RepartitionJoinWindowApp.OUTPUT_TOPIC_CONFIG_KEY, outputTopicName);
runApplication(app, appName, configs);
// consume and validate result
List<ConsumerRecord<String, String>> messages = consumeMessages(outputTopicName, 2);
assertEquals(2, messages.size());
for (ConsumerRecord<String, String> message : messages) {
String key = message.key();
String value = message.value();
Assert.assertTrue(key.equals("u1") || key.equals("u2"));
assertEquals("2", value);
}
// Verify that messages in the intermediate stream will be deleted in 10 seconds
long startTimeMs = System.currentTimeMillis();
for (String streamId : app.getIntermediateStreamIds()) {
long remainingMessageNum = -1;
while (remainingMessageNum != 0 && System.currentTimeMillis() - startTimeMs < 10000) {
remainingMessageNum = 0;
SystemStreamMetadata metadatas = (SystemStreamMetadata) systemAdmin.getSystemStreamMetadata(new HashSet<>(Arrays.asList(streamId)), new ExponentialSleepStrategy.Mock(3)).get(streamId);
for (Map.Entry<Partition, SystemStreamPartitionMetadata> entry : metadatas.getSystemStreamPartitionMetadata().entrySet()) {
SystemStreamPartitionMetadata metadata = entry.getValue();
remainingMessageNum += Long.parseLong(metadata.getUpcomingOffset()) - Long.parseLong(metadata.getOldestOffset());
}
}
assertEquals(0, remainingMessageNum);
}
}
use of org.apache.samza.system.SystemStreamMetadata.SystemStreamPartitionMetadata in project samza by apache.
the class TestTransactionalStateTaskRestoreManager method testGetStoreActionsForLoggedPersistentStore_FullRestoreIfCheckpointedOffsetOlderThanOldest.
/**
* This can happen if the changelog topic gets compacted and the local store offset was written prior to the
* compaction. If so, we do a full restore.
*/
@Test
public void testGetStoreActionsForLoggedPersistentStore_FullRestoreIfCheckpointedOffsetOlderThanOldest() {
TaskModel mockTaskModel = mock(TaskModel.class);
TaskName taskName = new TaskName("Partition 0");
when(mockTaskModel.getTaskName()).thenReturn(taskName);
Partition taskChangelogPartition = new Partition(0);
when(mockTaskModel.getChangelogPartition()).thenReturn(taskChangelogPartition);
String store1Name = "store1";
StorageEngine store1Engine = mock(StorageEngine.class);
StoreProperties mockStore1Properties = mock(StoreProperties.class);
when(store1Engine.getStoreProperties()).thenReturn(mockStore1Properties);
when(mockStore1Properties.isLoggedStore()).thenReturn(true);
when(mockStore1Properties.isPersistedToDisk()).thenReturn(true);
Map<String, StorageEngine> mockStoreEngines = ImmutableMap.of(store1Name, store1Engine);
String changelog1SystemName = "system1";
String changelog1StreamName = "store1Changelog";
SystemStream changelog1SystemStream = new SystemStream(changelog1SystemName, changelog1StreamName);
SystemStreamPartition changelog1SSP = new SystemStreamPartition(changelog1SystemStream, taskChangelogPartition);
// oldest offset > checkpointed changelog offset
SystemStreamPartitionMetadata changelog1SSPMetadata = new SystemStreamPartitionMetadata("11", "20", "21");
Map<String, SystemStream> mockStoreChangelogs = ImmutableMap.of(store1Name, changelog1SystemStream);
String changelog1CheckpointedOffset = "5";
CheckpointId checkpointId = CheckpointId.create();
KafkaStateCheckpointMarker kafkaStateCheckpointMarker = new KafkaStateCheckpointMarker(changelog1SSP, changelog1CheckpointedOffset);
Map<String, KafkaStateCheckpointMarker> mockCheckpointedChangelogOffset = new HashMap<String, KafkaStateCheckpointMarker>() {
{
put(store1Name, kafkaStateCheckpointMarker);
}
};
Map<SystemStreamPartition, SystemStreamPartitionMetadata> mockCurrentChangelogOffsets = ImmutableMap.of(changelog1SSP, changelog1SSPMetadata);
SystemAdmins mockSystemAdmins = mock(SystemAdmins.class);
SystemAdmin mockSystemAdmin = mock(SystemAdmin.class);
when(mockSystemAdmins.getSystemAdmin(changelog1SSP.getSystem())).thenReturn(mockSystemAdmin);
StorageManagerUtil mockStorageManagerUtil = mock(StorageManagerUtil.class);
File mockLoggedStoreBaseDir = mock(File.class);
File mockNonLoggedStoreBaseDir = mock(File.class);
Config mockConfig = mock(Config.class);
Clock mockClock = mock(Clock.class);
File mockCurrentStoreDir = mock(File.class);
File mockStoreNewerCheckpointDir = mock(File.class);
File mockStoreOlderCheckpointDir = mock(File.class);
String olderCheckpointDirLocalOffset = "3";
String newerCheckpointDirLocalOffset = "5";
when(mockStorageManagerUtil.getTaskStoreDir(eq(mockLoggedStoreBaseDir), eq(store1Name), eq(taskName), any())).thenReturn(mockCurrentStoreDir);
when(mockStorageManagerUtil.getTaskStoreCheckpointDirs(eq(mockLoggedStoreBaseDir), eq(store1Name), eq(taskName), any())).thenReturn(ImmutableList.of(mockStoreNewerCheckpointDir, mockStoreOlderCheckpointDir));
when(mockStorageManagerUtil.isLoggedStoreValid(eq(store1Name), eq(mockStoreNewerCheckpointDir), any(), eq(mockStoreChangelogs), eq(mockTaskModel), any(), eq(mockStoreEngines))).thenReturn(true);
when(mockStorageManagerUtil.isLoggedStoreValid(eq(store1Name), eq(mockStoreOlderCheckpointDir), any(), eq(mockStoreChangelogs), eq(mockTaskModel), any(), eq(mockStoreEngines))).thenReturn(true);
Set<SystemStreamPartition> mockChangelogSSPs = ImmutableSet.of(changelog1SSP);
when(mockStorageManagerUtil.readOffsetFile(eq(mockStoreNewerCheckpointDir), eq(mockChangelogSSPs), eq(false))).thenReturn(ImmutableMap.of(changelog1SSP, newerCheckpointDirLocalOffset));
when(mockStorageManagerUtil.readOffsetFile(eq(mockStoreOlderCheckpointDir), eq(mockChangelogSSPs), eq(false))).thenReturn(// less than checkpointed offset (5)
ImmutableMap.of(changelog1SSP, olderCheckpointDirLocalOffset));
Mockito.when(mockSystemAdmin.offsetComparator(anyString(), anyString())).thenAnswer((Answer<Integer>) invocation -> {
String offset1 = (String) invocation.getArguments()[0];
String offset2 = (String) invocation.getArguments()[1];
return Long.valueOf(offset1).compareTo(Long.valueOf(offset2));
});
StoreActions storeActions = TransactionalStateTaskRestoreManager.getStoreActions(mockTaskModel, mockStoreEngines, mockStoreChangelogs, mockCheckpointedChangelogOffset, checkpointId, mockCurrentChangelogOffsets, mockSystemAdmins, mockStorageManagerUtil, mockLoggedStoreBaseDir, mockNonLoggedStoreBaseDir, mockConfig, mockClock);
// ensure that all the store dirs (current or checkpoint) are marked for deletion
assertEquals(3, storeActions.storeDirsToDelete.get(store1Name).size());
assertTrue(storeActions.storeDirsToDelete.get(store1Name).contains(mockCurrentStoreDir));
assertTrue(storeActions.storeDirsToDelete.get(store1Name).contains(mockStoreOlderCheckpointDir));
assertTrue(storeActions.storeDirsToDelete.get(store1Name).contains(mockStoreNewerCheckpointDir));
// ensure that no directories are retained
assertEquals(0, storeActions.storeDirsToRetain.size());
// ensure that we mark the store for full restore (from current oldest to current newest)
assertEquals("11", storeActions.storesToRestore.get(store1Name).startingOffset);
assertEquals("20", storeActions.storesToRestore.get(store1Name).endingOffset);
}
Aggregations