Search in sources :

Example 1 with MultiLiveStreamFileReader

use of co.cask.cdap.data.stream.MultiLiveStreamFileReader in project cdap by caskdata.

the class AbstractStreamFileConsumerFactory method createReader.

private MultiLiveStreamFileReader createReader(final StreamConfig streamConfig, StreamConsumerState consumerState) throws IOException {
    Location streamLocation = streamConfig.getLocation();
    Preconditions.checkNotNull(streamLocation, "Stream location is null for %s", streamConfig.getStreamId());
    // Look for the latest stream generation
    final int generation = StreamUtils.getGeneration(streamConfig);
    streamLocation = StreamUtils.createGenerationLocation(streamLocation, generation);
    final long currentTime = System.currentTimeMillis();
    if (!Iterables.isEmpty(consumerState.getState())) {
        // See if any offset has a different generation or is expired. If so, don't use the old states.
        boolean useStoredStates = Iterables.all(consumerState.getState(), new Predicate<StreamFileOffset>() {

            @Override
            public boolean apply(StreamFileOffset input) {
                boolean isExpired = input.getPartitionEnd() < currentTime - streamConfig.getTTL();
                boolean sameGeneration = generation == input.getGeneration();
                return !isExpired && sameGeneration;
            }
        });
        if (useStoredStates) {
            LOG.info("Create file reader with consumer state: {}", consumerState);
            // Has existing offsets, just resume from there.
            MultiLiveStreamFileReader reader = new MultiLiveStreamFileReader(streamConfig, consumerState.getState());
            reader.initialize();
            return reader;
        }
    }
    // TODO: Support starting from some time rather then from beginning.
    // Otherwise, search for files with the smallest partition start time
    // If no partition exists for the stream, start with one partition earlier than current time to make sure
    // no event will be lost if events start flowing in about the same time.
    long startTime = StreamUtils.getPartitionStartTime(currentTime - streamConfig.getPartitionDuration(), streamConfig.getPartitionDuration());
    long earliestNonExpiredTime = StreamUtils.getPartitionStartTime(currentTime - streamConfig.getTTL(), streamConfig.getPartitionDuration());
    for (Location partitionLocation : streamLocation.list()) {
        if (!partitionLocation.isDirectory() || !StreamUtils.isPartition(partitionLocation.getName())) {
            // Partition should be a directory
            continue;
        }
        long partitionStartTime = StreamUtils.getPartitionStartTime(partitionLocation.getName());
        boolean isPartitionExpired = partitionStartTime < earliestNonExpiredTime;
        if (!isPartitionExpired && partitionStartTime < startTime) {
            startTime = partitionStartTime;
        }
    }
    // Create file offsets
    // TODO: Be able to support dynamic name of stream writer instances.
    // Maybe it's done through MultiLiveStreamHandler to alter list of file offsets dynamically
    Location partitionLocation = StreamUtils.createPartitionLocation(streamLocation, startTime, streamConfig.getPartitionDuration());
    List<StreamFileOffset> fileOffsets = Lists.newArrayList();
    getFileOffsets(partitionLocation, fileOffsets, generation);
    LOG.info("Empty consumer state. Create file reader with file offsets: groupId={}, instanceId={} states={}", consumerState.getGroupId(), consumerState.getInstanceId(), fileOffsets);
    MultiLiveStreamFileReader reader = new MultiLiveStreamFileReader(streamConfig, fileOffsets);
    reader.initialize();
    return reader;
}
Also used : MultiLiveStreamFileReader(co.cask.cdap.data.stream.MultiLiveStreamFileReader) StreamFileOffset(co.cask.cdap.data.stream.StreamFileOffset) Location(org.apache.twill.filesystem.Location)

Example 2 with MultiLiveStreamFileReader

use of co.cask.cdap.data.stream.MultiLiveStreamFileReader in project cdap by caskdata.

the class StreamFetchHandler method createReader.

/**
 * Creates a {@link FileReader} that starts reading stream event from the given partition.
 */
private FileReader<StreamEventOffset, Iterable<StreamFileOffset>> createReader(StreamConfig streamConfig, long startTime) throws IOException {
    int generation = StreamUtils.getGeneration(streamConfig);
    Location startPartition = getStartPartitionLocation(streamConfig, startTime, generation);
    if (startPartition == null) {
        return createEmptyReader();
    }
    List<StreamFileOffset> fileOffsets = Lists.newArrayList();
    int instances = cConf.getInt(Constants.Stream.CONTAINER_INSTANCES);
    String filePrefix = cConf.get(Constants.Stream.FILE_PREFIX);
    for (int i = 0; i < instances; i++) {
        // The actual file prefix is formed by file prefix in cConf + writer instance id
        String streamFilePrefix = filePrefix + '.' + i;
        Location eventLocation = StreamUtils.createStreamLocation(startPartition, streamFilePrefix, 0, StreamFileType.EVENT);
        fileOffsets.add(new StreamFileOffset(eventLocation, 0, generation));
    }
    MultiLiveStreamFileReader reader = new MultiLiveStreamFileReader(streamConfig, fileOffsets);
    reader.initialize();
    return reader;
}
Also used : MultiLiveStreamFileReader(co.cask.cdap.data.stream.MultiLiveStreamFileReader) StreamFileOffset(co.cask.cdap.data.stream.StreamFileOffset) Location(org.apache.twill.filesystem.Location)

Aggregations

MultiLiveStreamFileReader (co.cask.cdap.data.stream.MultiLiveStreamFileReader)2 StreamFileOffset (co.cask.cdap.data.stream.StreamFileOffset)2 Location (org.apache.twill.filesystem.Location)2