use of org.apache.samza.system.SystemStreamMetadata.SystemStreamPartitionMetadata in project samza by apache.
the class CoordinatorStreamSystemConsumer method register.
/**
* Retrieves the oldest offset in the coordinator stream, and registers the
* coordinator stream with the SystemConsumer using the earliest offset.
*/
public void register() {
if (isStarted) {
log.info("Coordinator stream partition {} has already been registered. Skipping.", coordinatorSystemStreamPartition);
return;
}
log.debug("Attempting to register: {}", coordinatorSystemStreamPartition);
Set<String> streamNames = new HashSet<String>();
String streamName = coordinatorSystemStreamPartition.getStream();
streamNames.add(streamName);
Map<String, SystemStreamMetadata> systemStreamMetadataMap = systemAdmin.getSystemStreamMetadata(streamNames);
log.info(String.format("Got metadata %s", systemStreamMetadataMap.toString()));
if (systemStreamMetadataMap == null) {
throw new SamzaException("Received a null systemStreamMetadataMap from the systemAdmin. This is illegal.");
}
SystemStreamMetadata systemStreamMetadata = systemStreamMetadataMap.get(streamName);
if (systemStreamMetadata == null) {
throw new SamzaException("Expected " + streamName + " to be in system stream metadata.");
}
SystemStreamPartitionMetadata systemStreamPartitionMetadata = systemStreamMetadata.getSystemStreamPartitionMetadata().get(coordinatorSystemStreamPartition.getPartition());
if (systemStreamPartitionMetadata == null) {
throw new SamzaException("Expected metadata for " + coordinatorSystemStreamPartition + " to exist.");
}
String startingOffset = systemStreamPartitionMetadata.getOldestOffset();
log.debug("Registering {} with offset {}", coordinatorSystemStreamPartition, startingOffset);
systemConsumer.register(coordinatorSystemStreamPartition, startingOffset);
}
use of org.apache.samza.system.SystemStreamMetadata.SystemStreamPartitionMetadata in project samza by apache.
the class CoordinatorStreamStore method registerConsumer.
/**
* <p>
* Fetches the metadata of the topic partition of coordinator stream. Registers the oldest offset
* for the topic partition of coordinator stream with the coordinator system consumer.
* </p>
*/
private void registerConsumer() {
LOG.debug("Attempting to register system stream partition: {}", coordinatorSystemStreamPartition);
String streamName = coordinatorSystemStreamPartition.getStream();
Map<String, SystemStreamMetadata> systemStreamMetadataMap = systemAdmin.getSystemStreamMetadata(Sets.newHashSet(streamName));
SystemStreamMetadata systemStreamMetadata = systemStreamMetadataMap.get(streamName);
Preconditions.checkNotNull(systemStreamMetadata, String.format("System stream metadata does not exist for stream: %s.", streamName));
SystemStreamPartitionMetadata systemStreamPartitionMetadata = systemStreamMetadata.getSystemStreamPartitionMetadata().get(coordinatorSystemStreamPartition.getPartition());
Preconditions.checkNotNull(systemStreamPartitionMetadata, String.format("System stream partition metadata does not exist for: %s.", coordinatorSystemStreamPartition));
String startingOffset = systemStreamPartitionMetadata.getOldestOffset();
LOG.info("Registering system stream partition: {} with offset: {}.", coordinatorSystemStreamPartition, startingOffset);
systemConsumer.register(coordinatorSystemStreamPartition, startingOffset);
}
use of org.apache.samza.system.SystemStreamMetadata.SystemStreamPartitionMetadata in project samza by apache.
the class TransactionalStateTaskRestoreManager method registerStartingOffsets.
/**
* Determines the starting offset for each store changelog SSP that needs to be restored from,
* and registers it with the respective SystemConsumer.
*/
@VisibleForTesting
static void registerStartingOffsets(TaskModel taskModel, StoreActions storeActions, Map<String, SystemStream> storeChangelogs, SystemAdmins systemAdmins, Map<String, SystemConsumer> storeConsumers, Map<SystemStreamPartition, SystemStreamPartitionMetadata> currentChangelogOffsets) {
Map<String, RestoreOffsets> storesToRestore = storeActions.storesToRestore;
// must register at least one SSP with each changelog system consumer otherwise start will throw.
// hence we register upcoming offset as the dummy offset by default and override it later if necessary.
// using upcoming offset ensures that no messages are replayed by default.
storeChangelogs.forEach((storeName, changelog) -> {
SystemStreamPartition changelogSSP = new SystemStreamPartition(changelog, taskModel.getChangelogPartition());
SystemConsumer systemConsumer = storeConsumers.get(storeName);
SystemStreamPartitionMetadata currentOffsets = currentChangelogOffsets.get(changelogSSP);
String upcomingOffset = currentOffsets.getUpcomingOffset();
LOG.info("Temporarily registering upcoming offset: {} as the starting offest for changelog ssp: {}. " + "This might be overridden later for stores that need restoring.", upcomingOffset, changelogSSP);
systemConsumer.register(changelogSSP, upcomingOffset);
});
// now register the actual starting offset if necessary. system consumer will ensure that the lower of the
// two registered offsets is used as the starting offset.
storesToRestore.forEach((storeName, restoreOffsets) -> {
SystemStream changelog = storeChangelogs.get(storeName);
SystemStreamPartition changelogSSP = new SystemStreamPartition(changelog, taskModel.getChangelogPartition());
SystemAdmin systemAdmin = systemAdmins.getSystemAdmin(changelog.getSystem());
validateRestoreOffsets(restoreOffsets, systemAdmin);
SystemConsumer systemConsumer = storeConsumers.get(storeName);
SystemStreamPartitionMetadata currentOffsets = currentChangelogOffsets.get(changelogSSP);
String oldestOffset = currentOffsets.getOldestOffset();
// if the starting offset equals oldest offset (e.g. for full restore), start from the oldest offset (inclusive).
// else, start from the next (upcoming) offset.
String startingOffset;
if (systemAdmin.offsetComparator(restoreOffsets.startingOffset, oldestOffset) == 0) {
startingOffset = oldestOffset;
} else {
Map<SystemStreamPartition, String> offsetMap = ImmutableMap.of(changelogSSP, restoreOffsets.startingOffset);
startingOffset = systemAdmin.getOffsetsAfter(offsetMap).get(changelogSSP);
}
LOG.info("Registering starting offset: {} for changelog ssp: {}", startingOffset, changelogSSP);
systemConsumer.register(changelogSSP, startingOffset);
});
}
use of org.apache.samza.system.SystemStreamMetadata.SystemStreamPartitionMetadata in project samza by apache.
the class DirectoryPartitioner method getPartitionMetadataMap.
/**
* Get partition metadata for a stream
* @param streamName name of the stream; should contain the information about the path of the
* root directory
* @param existingPartitionDescriptorMap map of the existing partition descriptor
* @return map of SSP metadata
*/
public Map<Partition, SystemStreamPartitionMetadata> getPartitionMetadataMap(String streamName, @Nullable Map<Partition, List<String>> existingPartitionDescriptorMap) {
LOG.info("Trying to obtain metadata for " + streamName);
LOG.info("Existing partition descriptor: " + (MapUtils.isEmpty(existingPartitionDescriptorMap) ? "empty" : existingPartitionDescriptorMap));
Map<Partition, SystemStreamPartitionMetadata> partitionMetadataMap = new HashMap<>();
partitionDescriptorMap.putIfAbsent(streamName, new HashMap<>());
List<FileMetadata> filteredFiles = getFilteredFiles(streamName);
if (!MapUtils.isEmpty(existingPartitionDescriptorMap)) {
filteredFiles = validateAndGetOriginalFilteredFiles(filteredFiles, existingPartitionDescriptorMap);
}
List<List<FileMetadata>> groupedPartitions = generatePartitionGroups(filteredFiles);
int partitionId = 0;
for (List<FileMetadata> fileGroup : groupedPartitions) {
Partition partition = new Partition(partitionId);
List<String> pathList = new ArrayList<>();
List<String> lengthList = new ArrayList<>();
fileGroup.forEach(fileMetadata -> {
pathList.add(fileMetadata.getPath());
lengthList.add(String.valueOf(fileMetadata.getLen()));
});
String oldestOffset = MultiFileHdfsReader.generateOffset(0, "0");
String newestOffset = MultiFileHdfsReader.generateOffset(lengthList.size() - 1, String.valueOf(lengthList.get(lengthList.size() - 1)));
SystemStreamPartitionMetadata metadata = new SystemStreamPartitionMetadata(oldestOffset, newestOffset, null);
partitionMetadataMap.put(partition, metadata);
partitionDescriptorMap.get(streamName).put(partition, pathList);
partitionId++;
}
LOG.info("Obtained metadata map as: " + partitionMetadataMap);
LOG.info("Computed partition description as: " + partitionDescriptorMap);
return partitionMetadataMap;
}
use of org.apache.samza.system.SystemStreamMetadata.SystemStreamPartitionMetadata in project samza by apache.
the class TestDirectoryPartitioner method testBasicBlackListFiltering.
@Test
public void testBasicBlackListFiltering() {
List<FileMetadata> testList = new ArrayList<>();
int numInput = 9;
String[] inputFiles = { "part-001.avro", "part-002.avro", "part-003.avro", "delta-01.avro", "part-005.avro", "delta-03.avro", "part-004.avro", "delta-02.avro", "part-006.avro" };
long[] fileLength = { 150582, 138132, 214005, 205738, 158273, 982345, 313245, 234212, 413232 };
for (int i = 0; i < numInput; i++) {
testList.add(new FileMetadata(inputFiles[i], fileLength[i]));
}
String whiteList = ".*";
String blackList = "delta-.*\\.avro";
String groupPattern = "";
int expectedNumPartition = 6;
int[][] expectedPartitioning = { { 0 }, { 1 }, { 2 }, { 4 }, { 6 }, { 8 } };
DirectoryPartitioner directoryPartitioner = new DirectoryPartitioner(whiteList, blackList, groupPattern, new TestFileSystemAdapter(testList));
Map<Partition, SystemStreamPartitionMetadata> metadataMap = directoryPartitioner.getPartitionMetadataMap("hdfs", null);
Assert.assertEquals(expectedNumPartition, metadataMap.size());
Map<Partition, List<String>> descriporMap = directoryPartitioner.getPartitionDescriptor("hdfs");
verifyPartitionDescriptor(inputFiles, expectedPartitioning, expectedNumPartition, descriporMap);
}
Aggregations