Search in sources :

Example 21 with SystemStreamPartitionMetadata

use of org.apache.samza.system.SystemStreamMetadata.SystemStreamPartitionMetadata in project samza by apache.

the class CoordinatorStreamSystemConsumer method register.

/**
 * Retrieves the oldest offset in the coordinator stream, and registers the
 * coordinator stream with the SystemConsumer using the earliest offset.
 */
public void register() {
    if (isStarted) {
        log.info("Coordinator stream partition {} has already been registered. Skipping.", coordinatorSystemStreamPartition);
        return;
    }
    log.debug("Attempting to register: {}", coordinatorSystemStreamPartition);
    Set<String> streamNames = new HashSet<String>();
    String streamName = coordinatorSystemStreamPartition.getStream();
    streamNames.add(streamName);
    Map<String, SystemStreamMetadata> systemStreamMetadataMap = systemAdmin.getSystemStreamMetadata(streamNames);
    log.info(String.format("Got metadata %s", systemStreamMetadataMap.toString()));
    if (systemStreamMetadataMap == null) {
        throw new SamzaException("Received a null systemStreamMetadataMap from the systemAdmin. This is illegal.");
    }
    SystemStreamMetadata systemStreamMetadata = systemStreamMetadataMap.get(streamName);
    if (systemStreamMetadata == null) {
        throw new SamzaException("Expected " + streamName + " to be in system stream metadata.");
    }
    SystemStreamPartitionMetadata systemStreamPartitionMetadata = systemStreamMetadata.getSystemStreamPartitionMetadata().get(coordinatorSystemStreamPartition.getPartition());
    if (systemStreamPartitionMetadata == null) {
        throw new SamzaException("Expected metadata for " + coordinatorSystemStreamPartition + " to exist.");
    }
    String startingOffset = systemStreamPartitionMetadata.getOldestOffset();
    log.debug("Registering {} with offset {}", coordinatorSystemStreamPartition, startingOffset);
    systemConsumer.register(coordinatorSystemStreamPartition, startingOffset);
}
Also used : SystemStreamMetadata(org.apache.samza.system.SystemStreamMetadata) SystemStreamPartitionMetadata(org.apache.samza.system.SystemStreamMetadata.SystemStreamPartitionMetadata) SamzaException(org.apache.samza.SamzaException) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet)

Example 22 with SystemStreamPartitionMetadata

use of org.apache.samza.system.SystemStreamMetadata.SystemStreamPartitionMetadata in project samza by apache.

the class CoordinatorStreamStore method registerConsumer.

/**
 * <p>
 *   Fetches the metadata of the topic partition of coordinator stream. Registers the oldest offset
 *   for the topic partition of coordinator stream with the coordinator system consumer.
 * </p>
 */
private void registerConsumer() {
    LOG.debug("Attempting to register system stream partition: {}", coordinatorSystemStreamPartition);
    String streamName = coordinatorSystemStreamPartition.getStream();
    Map<String, SystemStreamMetadata> systemStreamMetadataMap = systemAdmin.getSystemStreamMetadata(Sets.newHashSet(streamName));
    SystemStreamMetadata systemStreamMetadata = systemStreamMetadataMap.get(streamName);
    Preconditions.checkNotNull(systemStreamMetadata, String.format("System stream metadata does not exist for stream: %s.", streamName));
    SystemStreamPartitionMetadata systemStreamPartitionMetadata = systemStreamMetadata.getSystemStreamPartitionMetadata().get(coordinatorSystemStreamPartition.getPartition());
    Preconditions.checkNotNull(systemStreamPartitionMetadata, String.format("System stream partition metadata does not exist for: %s.", coordinatorSystemStreamPartition));
    String startingOffset = systemStreamPartitionMetadata.getOldestOffset();
    LOG.info("Registering system stream partition: {} with offset: {}.", coordinatorSystemStreamPartition, startingOffset);
    systemConsumer.register(coordinatorSystemStreamPartition, startingOffset);
}
Also used : SystemStreamMetadata(org.apache.samza.system.SystemStreamMetadata) SystemStreamPartitionMetadata(org.apache.samza.system.SystemStreamMetadata.SystemStreamPartitionMetadata)

Example 23 with SystemStreamPartitionMetadata

use of org.apache.samza.system.SystemStreamMetadata.SystemStreamPartitionMetadata in project samza by apache.

the class TransactionalStateTaskRestoreManager method registerStartingOffsets.

/**
 * Determines the starting offset for each store changelog SSP that needs to be restored from,
 * and registers it with the respective SystemConsumer.
 */
@VisibleForTesting
static void registerStartingOffsets(TaskModel taskModel, StoreActions storeActions, Map<String, SystemStream> storeChangelogs, SystemAdmins systemAdmins, Map<String, SystemConsumer> storeConsumers, Map<SystemStreamPartition, SystemStreamPartitionMetadata> currentChangelogOffsets) {
    Map<String, RestoreOffsets> storesToRestore = storeActions.storesToRestore;
    // must register at least one SSP with each changelog system consumer otherwise start will throw.
    // hence we register upcoming offset as the dummy offset by default and override it later if necessary.
    // using upcoming offset ensures that no messages are replayed by default.
    storeChangelogs.forEach((storeName, changelog) -> {
        SystemStreamPartition changelogSSP = new SystemStreamPartition(changelog, taskModel.getChangelogPartition());
        SystemConsumer systemConsumer = storeConsumers.get(storeName);
        SystemStreamPartitionMetadata currentOffsets = currentChangelogOffsets.get(changelogSSP);
        String upcomingOffset = currentOffsets.getUpcomingOffset();
        LOG.info("Temporarily registering upcoming offset: {} as the starting offest for changelog ssp: {}. " + "This might be overridden later for stores that need restoring.", upcomingOffset, changelogSSP);
        systemConsumer.register(changelogSSP, upcomingOffset);
    });
    // now register the actual starting offset if necessary. system consumer will ensure that the lower of the
    // two registered offsets is used as the starting offset.
    storesToRestore.forEach((storeName, restoreOffsets) -> {
        SystemStream changelog = storeChangelogs.get(storeName);
        SystemStreamPartition changelogSSP = new SystemStreamPartition(changelog, taskModel.getChangelogPartition());
        SystemAdmin systemAdmin = systemAdmins.getSystemAdmin(changelog.getSystem());
        validateRestoreOffsets(restoreOffsets, systemAdmin);
        SystemConsumer systemConsumer = storeConsumers.get(storeName);
        SystemStreamPartitionMetadata currentOffsets = currentChangelogOffsets.get(changelogSSP);
        String oldestOffset = currentOffsets.getOldestOffset();
        // if the starting offset equals oldest offset (e.g. for full restore), start from the oldest offset (inclusive).
        // else, start from the next (upcoming) offset.
        String startingOffset;
        if (systemAdmin.offsetComparator(restoreOffsets.startingOffset, oldestOffset) == 0) {
            startingOffset = oldestOffset;
        } else {
            Map<SystemStreamPartition, String> offsetMap = ImmutableMap.of(changelogSSP, restoreOffsets.startingOffset);
            startingOffset = systemAdmin.getOffsetsAfter(offsetMap).get(changelogSSP);
        }
        LOG.info("Registering starting offset: {} for changelog ssp: {}", startingOffset, changelogSSP);
        systemConsumer.register(changelogSSP, startingOffset);
    });
}
Also used : SystemConsumer(org.apache.samza.system.SystemConsumer) SystemStream(org.apache.samza.system.SystemStream) SystemStreamPartitionMetadata(org.apache.samza.system.SystemStreamMetadata.SystemStreamPartitionMetadata) SystemAdmin(org.apache.samza.system.SystemAdmin) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 24 with SystemStreamPartitionMetadata

use of org.apache.samza.system.SystemStreamMetadata.SystemStreamPartitionMetadata in project samza by apache.

the class DirectoryPartitioner method getPartitionMetadataMap.

/**
 * Get partition metadata for a stream
 * @param streamName name of the stream; should contain the information about the path of the
 *                   root directory
 * @param existingPartitionDescriptorMap map of the existing partition descriptor
 * @return map of SSP metadata
 */
public Map<Partition, SystemStreamPartitionMetadata> getPartitionMetadataMap(String streamName, @Nullable Map<Partition, List<String>> existingPartitionDescriptorMap) {
    LOG.info("Trying to obtain metadata for " + streamName);
    LOG.info("Existing partition descriptor: " + (MapUtils.isEmpty(existingPartitionDescriptorMap) ? "empty" : existingPartitionDescriptorMap));
    Map<Partition, SystemStreamPartitionMetadata> partitionMetadataMap = new HashMap<>();
    partitionDescriptorMap.putIfAbsent(streamName, new HashMap<>());
    List<FileMetadata> filteredFiles = getFilteredFiles(streamName);
    if (!MapUtils.isEmpty(existingPartitionDescriptorMap)) {
        filteredFiles = validateAndGetOriginalFilteredFiles(filteredFiles, existingPartitionDescriptorMap);
    }
    List<List<FileMetadata>> groupedPartitions = generatePartitionGroups(filteredFiles);
    int partitionId = 0;
    for (List<FileMetadata> fileGroup : groupedPartitions) {
        Partition partition = new Partition(partitionId);
        List<String> pathList = new ArrayList<>();
        List<String> lengthList = new ArrayList<>();
        fileGroup.forEach(fileMetadata -> {
            pathList.add(fileMetadata.getPath());
            lengthList.add(String.valueOf(fileMetadata.getLen()));
        });
        String oldestOffset = MultiFileHdfsReader.generateOffset(0, "0");
        String newestOffset = MultiFileHdfsReader.generateOffset(lengthList.size() - 1, String.valueOf(lengthList.get(lengthList.size() - 1)));
        SystemStreamPartitionMetadata metadata = new SystemStreamPartitionMetadata(oldestOffset, newestOffset, null);
        partitionMetadataMap.put(partition, metadata);
        partitionDescriptorMap.get(streamName).put(partition, pathList);
        partitionId++;
    }
    LOG.info("Obtained metadata map as: " + partitionMetadataMap);
    LOG.info("Computed partition description as: " + partitionDescriptorMap);
    return partitionMetadataMap;
}
Also used : Partition(org.apache.samza.Partition) HashMap(java.util.HashMap) FileMetadata(org.apache.samza.system.hdfs.partitioner.FileSystemAdapter.FileMetadata) ArrayList(java.util.ArrayList) SystemStreamPartitionMetadata(org.apache.samza.system.SystemStreamMetadata.SystemStreamPartitionMetadata) ArrayList(java.util.ArrayList) List(java.util.List)

Example 25 with SystemStreamPartitionMetadata

use of org.apache.samza.system.SystemStreamMetadata.SystemStreamPartitionMetadata in project samza by apache.

the class TestDirectoryPartitioner method testBasicBlackListFiltering.

@Test
public void testBasicBlackListFiltering() {
    List<FileMetadata> testList = new ArrayList<>();
    int numInput = 9;
    String[] inputFiles = { "part-001.avro", "part-002.avro", "part-003.avro", "delta-01.avro", "part-005.avro", "delta-03.avro", "part-004.avro", "delta-02.avro", "part-006.avro" };
    long[] fileLength = { 150582, 138132, 214005, 205738, 158273, 982345, 313245, 234212, 413232 };
    for (int i = 0; i < numInput; i++) {
        testList.add(new FileMetadata(inputFiles[i], fileLength[i]));
    }
    String whiteList = ".*";
    String blackList = "delta-.*\\.avro";
    String groupPattern = "";
    int expectedNumPartition = 6;
    int[][] expectedPartitioning = { { 0 }, { 1 }, { 2 }, { 4 }, { 6 }, { 8 } };
    DirectoryPartitioner directoryPartitioner = new DirectoryPartitioner(whiteList, blackList, groupPattern, new TestFileSystemAdapter(testList));
    Map<Partition, SystemStreamPartitionMetadata> metadataMap = directoryPartitioner.getPartitionMetadataMap("hdfs", null);
    Assert.assertEquals(expectedNumPartition, metadataMap.size());
    Map<Partition, List<String>> descriporMap = directoryPartitioner.getPartitionDescriptor("hdfs");
    verifyPartitionDescriptor(inputFiles, expectedPartitioning, expectedNumPartition, descriporMap);
}
Also used : Partition(org.apache.samza.Partition) FileMetadata(org.apache.samza.system.hdfs.partitioner.FileSystemAdapter.FileMetadata) ArrayList(java.util.ArrayList) SystemStreamPartitionMetadata(org.apache.samza.system.SystemStreamMetadata.SystemStreamPartitionMetadata) List(java.util.List) ArrayList(java.util.ArrayList) Test(org.junit.Test)

Aggregations

SystemStreamPartitionMetadata (org.apache.samza.system.SystemStreamMetadata.SystemStreamPartitionMetadata)45 Partition (org.apache.samza.Partition)42 Test (org.junit.Test)37 SystemStreamPartition (org.apache.samza.system.SystemStreamPartition)35 HashMap (java.util.HashMap)33 SystemStream (org.apache.samza.system.SystemStream)32 TaskName (org.apache.samza.container.TaskName)30 SystemAdmin (org.apache.samza.system.SystemAdmin)29 SystemAdmins (org.apache.samza.system.SystemAdmins)29 KafkaStateCheckpointMarker (org.apache.samza.checkpoint.kafka.KafkaStateCheckpointMarker)26 Map (java.util.Map)25 TaskModel (org.apache.samza.job.model.TaskModel)25 ImmutableMap (com.google.common.collect.ImmutableMap)24 File (java.io.File)24 Config (org.apache.samza.config.Config)24 TaskConfig (org.apache.samza.config.TaskConfig)24 SSPMetadataCache (org.apache.samza.system.SSPMetadataCache)24 SystemConsumer (org.apache.samza.system.SystemConsumer)24 Clock (org.apache.samza.util.Clock)24 Matchers.anyString (org.mockito.Matchers.anyString)24