use of co.cask.cdap.data.stream.MultiLiveStreamFileReader in project cdap by caskdata.
the class AbstractStreamFileConsumerFactory method createReader.
private MultiLiveStreamFileReader createReader(final StreamConfig streamConfig, StreamConsumerState consumerState) throws IOException {
Location streamLocation = streamConfig.getLocation();
Preconditions.checkNotNull(streamLocation, "Stream location is null for %s", streamConfig.getStreamId());
// Look for the latest stream generation
final int generation = StreamUtils.getGeneration(streamConfig);
streamLocation = StreamUtils.createGenerationLocation(streamLocation, generation);
final long currentTime = System.currentTimeMillis();
if (!Iterables.isEmpty(consumerState.getState())) {
// See if any offset has a different generation or is expired. If so, don't use the old states.
boolean useStoredStates = Iterables.all(consumerState.getState(), new Predicate<StreamFileOffset>() {
@Override
public boolean apply(StreamFileOffset input) {
boolean isExpired = input.getPartitionEnd() < currentTime - streamConfig.getTTL();
boolean sameGeneration = generation == input.getGeneration();
return !isExpired && sameGeneration;
}
});
if (useStoredStates) {
LOG.info("Create file reader with consumer state: {}", consumerState);
// Has existing offsets, just resume from there.
MultiLiveStreamFileReader reader = new MultiLiveStreamFileReader(streamConfig, consumerState.getState());
reader.initialize();
return reader;
}
}
// TODO: Support starting from some time rather then from beginning.
// Otherwise, search for files with the smallest partition start time
// If no partition exists for the stream, start with one partition earlier than current time to make sure
// no event will be lost if events start flowing in about the same time.
long startTime = StreamUtils.getPartitionStartTime(currentTime - streamConfig.getPartitionDuration(), streamConfig.getPartitionDuration());
long earliestNonExpiredTime = StreamUtils.getPartitionStartTime(currentTime - streamConfig.getTTL(), streamConfig.getPartitionDuration());
for (Location partitionLocation : streamLocation.list()) {
if (!partitionLocation.isDirectory() || !StreamUtils.isPartition(partitionLocation.getName())) {
// Partition should be a directory
continue;
}
long partitionStartTime = StreamUtils.getPartitionStartTime(partitionLocation.getName());
boolean isPartitionExpired = partitionStartTime < earliestNonExpiredTime;
if (!isPartitionExpired && partitionStartTime < startTime) {
startTime = partitionStartTime;
}
}
// Create file offsets
// TODO: Be able to support dynamic name of stream writer instances.
// Maybe it's done through MultiLiveStreamHandler to alter list of file offsets dynamically
Location partitionLocation = StreamUtils.createPartitionLocation(streamLocation, startTime, streamConfig.getPartitionDuration());
List<StreamFileOffset> fileOffsets = Lists.newArrayList();
getFileOffsets(partitionLocation, fileOffsets, generation);
LOG.info("Empty consumer state. Create file reader with file offsets: groupId={}, instanceId={} states={}", consumerState.getGroupId(), consumerState.getInstanceId(), fileOffsets);
MultiLiveStreamFileReader reader = new MultiLiveStreamFileReader(streamConfig, fileOffsets);
reader.initialize();
return reader;
}
use of co.cask.cdap.data.stream.MultiLiveStreamFileReader in project cdap by caskdata.
the class StreamFetchHandler method createReader.
/**
* Creates a {@link FileReader} that starts reading stream event from the given partition.
*/
private FileReader<StreamEventOffset, Iterable<StreamFileOffset>> createReader(StreamConfig streamConfig, long startTime) throws IOException {
int generation = StreamUtils.getGeneration(streamConfig);
Location startPartition = getStartPartitionLocation(streamConfig, startTime, generation);
if (startPartition == null) {
return createEmptyReader();
}
List<StreamFileOffset> fileOffsets = Lists.newArrayList();
int instances = cConf.getInt(Constants.Stream.CONTAINER_INSTANCES);
String filePrefix = cConf.get(Constants.Stream.FILE_PREFIX);
for (int i = 0; i < instances; i++) {
// The actual file prefix is formed by file prefix in cConf + writer instance id
String streamFilePrefix = filePrefix + '.' + i;
Location eventLocation = StreamUtils.createStreamLocation(startPartition, streamFilePrefix, 0, StreamFileType.EVENT);
fileOffsets.add(new StreamFileOffset(eventLocation, 0, generation));
}
MultiLiveStreamFileReader reader = new MultiLiveStreamFileReader(streamConfig, fileOffsets);
reader.initialize();
return reader;
}
Aggregations