use of co.cask.cdap.data.stream.StreamFileOffset in project cdap by caskdata.
the class FileStreamAdmin method mutateStates.
private void mutateStates(long groupId, int instances, Set<StreamConsumerState> states, Set<StreamConsumerState> newStates, Set<StreamConsumerState> removeStates) {
int oldInstances = states.size();
if (oldInstances == instances) {
// If number of instances doesn't changed, no need to mutate any states
return;
}
// Collects smallest offsets across all existing consumers
// Map from event file location to file offset.
// Use tree map to maintain ordering consistency in the offsets.
// Not required by any logic, just easier to look at when logged.
Map<Location, StreamFileOffset> fileOffsets = Maps.newTreeMap(Locations.LOCATION_COMPARATOR);
for (StreamConsumerState state : states) {
for (StreamFileOffset fileOffset : state.getState()) {
StreamFileOffset smallestOffset = fileOffsets.get(fileOffset.getEventLocation());
if (smallestOffset == null || fileOffset.getOffset() < smallestOffset.getOffset()) {
fileOffsets.put(fileOffset.getEventLocation(), new StreamFileOffset(fileOffset));
}
}
}
// Constructs smallest offsets
Collection<StreamFileOffset> smallestOffsets = fileOffsets.values();
// When group size changed, reset all existing instances states to have smallest files offsets constructed above.
for (StreamConsumerState state : states) {
if (state.getInstanceId() < instances) {
// Only keep valid instances
newStates.add(new StreamConsumerState(groupId, state.getInstanceId(), smallestOffsets));
} else {
removeStates.add(state);
}
}
// For all new instances, set files offsets to smallest one constructed above.
for (int i = oldInstances; i < instances; i++) {
newStates.add(new StreamConsumerState(groupId, i, smallestOffsets));
}
}
use of co.cask.cdap.data.stream.StreamFileOffset in project cdap by caskdata.
the class StreamFetchHandler method fetch.
/**
* Handler for the HTTP API {@code /streams/[stream_name]/events?start=[start_ts]&end=[end_ts]&limit=[event_limit]}
* <p>
* Responds with:
* <ul>
* <li>404 if stream does not exist</li>
* <li>204 if no event in the given start/end time range exists</li>
* <li>200 if there is are one or more events</li>
* </ul>
* </p>
* <p>
* Response body is a JSON array of the StreamEvent object.
* </p>
*
* @see StreamEventTypeAdapter StreamEventTypeAdapter for the format of the StreamEvent object
*/
@GET
@Path("/{stream}/events")
public void fetch(HttpRequest request, final HttpResponder responder, @PathParam("namespace-id") String namespaceId, @PathParam("stream") String stream, @QueryParam("start") @DefaultValue("0") String start, @QueryParam("end") @DefaultValue("9223372036854775807") String end, @QueryParam("limit") @DefaultValue("2147483647") final int limitEvents) throws Exception {
long startTime = TimeMathParser.parseTime(start, TimeUnit.MILLISECONDS);
long endTime = TimeMathParser.parseTime(end, TimeUnit.MILLISECONDS);
StreamId streamId = new StreamId(namespaceId, stream);
if (!verifyGetEventsRequest(streamId, startTime, endTime, limitEvents, responder)) {
return;
}
// Make sure the user has READ permission on the stream since getConfig doesn't check for the same.
authorizationEnforcer.enforce(streamId, authenticationContext.getPrincipal(), Action.READ);
final StreamConfig streamConfig = streamAdmin.getConfig(streamId);
long now = System.currentTimeMillis();
startTime = Math.max(startTime, now - streamConfig.getTTL());
endTime = Math.min(endTime, now);
final long streamStartTime = startTime;
final long streamEndTime = endTime;
impersonator.doAs(streamId, new Callable<Void>() {
@Override
public Void call() throws Exception {
int limit = limitEvents;
// Create the stream event reader
try (FileReader<StreamEventOffset, Iterable<StreamFileOffset>> reader = createReader(streamConfig, streamStartTime)) {
TimeRangeReadFilter readFilter = new TimeRangeReadFilter(streamStartTime, streamEndTime);
List<StreamEvent> events = Lists.newArrayListWithCapacity(100);
// Reads the first batch of events from the stream.
int eventsRead = readEvents(reader, events, limit, readFilter);
// If empty already, return 204 no content
if (eventsRead <= 0) {
responder.sendStatus(HttpResponseStatus.NO_CONTENT);
return null;
}
// Send with chunk response, as we don't want to buffer all events in memory to determine the content-length.
ChunkResponder chunkResponder = responder.sendChunkStart(HttpResponseStatus.OK, new DefaultHttpHeaders().set(HttpHeaderNames.CONTENT_TYPE, "application/json; charset=utf-8"));
ByteBuf buffer = Unpooled.buffer();
JsonWriter jsonWriter = new JsonWriter(new OutputStreamWriter(new ByteBufOutputStream(buffer), StandardCharsets.UTF_8));
// Response is an array of stream event
jsonWriter.beginArray();
while (limit > 0 && eventsRead > 0) {
limit -= eventsRead;
for (StreamEvent event : events) {
GSON.toJson(event, StreamEvent.class, jsonWriter);
jsonWriter.flush();
// If exceeded chunk size limit, send a new chunk.
if (buffer.readableBytes() >= CHUNK_SIZE) {
// If the connect is closed, sendChunk will throw IOException.
// No need to handle the exception as it will just propagated back to the netty-http library
// and it will handle it.
// Need to copy the buffer because the buffer will get reused and send chunk is an async operation
chunkResponder.sendChunk(buffer.copy());
buffer.clear();
}
}
events.clear();
if (limit > 0) {
eventsRead = readEvents(reader, events, limit, readFilter);
}
}
jsonWriter.endArray();
jsonWriter.close();
// Send the last chunk that still has data
if (buffer.isReadable()) {
// No need to copy the last chunk, since the buffer will not be reused
chunkResponder.sendChunk(buffer);
}
Closeables.closeQuietly(chunkResponder);
}
return null;
}
});
}
use of co.cask.cdap.data.stream.StreamFileOffset in project cdap by caskdata.
the class StreamConsumerStateTestBase method testChangeInstance.
@Test
public void testChangeInstance() throws Exception {
StreamAdmin streamAdmin = getStreamAdmin();
String streamName = "testChangeInstance";
StreamId streamId = TEST_NAMESPACE.stream(streamName);
streamAdmin.create(streamId);
StreamConfig config = streamAdmin.getConfig(streamId);
// Creates a state with 4 offsets
StreamConsumerState state = generateState(0L, 0, config, 0L, 4);
StreamConsumerStateStore stateStore = createStateStore(config);
// Save the state.
stateStore.save(state);
// Increase the number of instances
streamAdmin.configureInstances(streamId, 0L, 2);
StreamConsumerState newState = stateStore.get(0L, 1);
// Get the state of the new instance, should be the same as the existing one
Assert.assertTrue(Iterables.elementsEqual(state.getState(), newState.getState()));
// Change the state of instance 0 to higher offset.
List<StreamFileOffset> fileOffsets = Lists.newArrayList(state.getState());
StreamFileOffset fileOffset = fileOffsets.get(0);
long oldOffset = fileOffset.getOffset();
long newOffset = oldOffset + 100000;
fileOffsets.set(0, new StreamFileOffset(fileOffset, newOffset));
state.setState(fileOffsets);
stateStore.save(state);
// Verify the change
state = stateStore.get(0L, 0);
Assert.assertEquals(newOffset, Iterables.get(state.getState(), 0).getOffset());
// Increase the number of instances again
streamAdmin.configureInstances(streamId, 0L, 3);
// Verify that instance 0 has offset getting resetted to lowest
state = stateStore.get(0L, 0);
Assert.assertEquals(oldOffset, Iterables.get(state.getState(), 0).getOffset());
// Verify that no new file offsets state is being introduced (test a bug in the configureInstance implementation)
Assert.assertEquals(4, Iterables.size(state.getState()));
// Verify that all offsets are the same
List<StreamConsumerState> states = Lists.newArrayList();
stateStore.getByGroup(0L, states);
Assert.assertEquals(3, states.size());
Assert.assertTrue(Iterables.elementsEqual(states.get(0).getState(), states.get(1).getState()));
Assert.assertTrue(Iterables.elementsEqual(states.get(0).getState(), states.get(2).getState()));
}
use of co.cask.cdap.data.stream.StreamFileOffset in project cdap by caskdata.
the class StreamConsumerStateTestBase method generateState.
private StreamConsumerState generateState(long groupId, int instanceId, StreamConfig config, long partitionBaseTime, int numOffsets) throws IOException {
List<StreamFileOffset> offsets = Lists.newArrayList();
long partitionDuration = config.getPartitionDuration();
for (int i = 0; i < numOffsets; i++) {
Location partitionLocation = StreamUtils.createPartitionLocation(config.getLocation(), (partitionBaseTime + i) * partitionDuration, config.getPartitionDuration());
offsets.add(new StreamFileOffset(StreamUtils.createStreamLocation(partitionLocation, "file", 0, StreamFileType.EVENT), i * 1000, 0));
}
return new StreamConsumerState(groupId, instanceId, offsets);
}
use of co.cask.cdap.data.stream.StreamFileOffset in project cdap by caskdata.
the class AbstractStreamFileConsumerFactory method createReader.
private MultiLiveStreamFileReader createReader(final StreamConfig streamConfig, StreamConsumerState consumerState) throws IOException {
Location streamLocation = streamConfig.getLocation();
Preconditions.checkNotNull(streamLocation, "Stream location is null for %s", streamConfig.getStreamId());
// Look for the latest stream generation
final int generation = StreamUtils.getGeneration(streamConfig);
streamLocation = StreamUtils.createGenerationLocation(streamLocation, generation);
final long currentTime = System.currentTimeMillis();
if (!Iterables.isEmpty(consumerState.getState())) {
// See if any offset has a different generation or is expired. If so, don't use the old states.
boolean useStoredStates = Iterables.all(consumerState.getState(), new Predicate<StreamFileOffset>() {
@Override
public boolean apply(StreamFileOffset input) {
boolean isExpired = input.getPartitionEnd() < currentTime - streamConfig.getTTL();
boolean sameGeneration = generation == input.getGeneration();
return !isExpired && sameGeneration;
}
});
if (useStoredStates) {
LOG.info("Create file reader with consumer state: {}", consumerState);
// Has existing offsets, just resume from there.
MultiLiveStreamFileReader reader = new MultiLiveStreamFileReader(streamConfig, consumerState.getState());
reader.initialize();
return reader;
}
}
// TODO: Support starting from some time rather then from beginning.
// Otherwise, search for files with the smallest partition start time
// If no partition exists for the stream, start with one partition earlier than current time to make sure
// no event will be lost if events start flowing in about the same time.
long startTime = StreamUtils.getPartitionStartTime(currentTime - streamConfig.getPartitionDuration(), streamConfig.getPartitionDuration());
long earliestNonExpiredTime = StreamUtils.getPartitionStartTime(currentTime - streamConfig.getTTL(), streamConfig.getPartitionDuration());
for (Location partitionLocation : streamLocation.list()) {
if (!partitionLocation.isDirectory() || !StreamUtils.isPartition(partitionLocation.getName())) {
// Partition should be a directory
continue;
}
long partitionStartTime = StreamUtils.getPartitionStartTime(partitionLocation.getName());
boolean isPartitionExpired = partitionStartTime < earliestNonExpiredTime;
if (!isPartitionExpired && partitionStartTime < startTime) {
startTime = partitionStartTime;
}
}
// Create file offsets
// TODO: Be able to support dynamic name of stream writer instances.
// Maybe it's done through MultiLiveStreamHandler to alter list of file offsets dynamically
Location partitionLocation = StreamUtils.createPartitionLocation(streamLocation, startTime, streamConfig.getPartitionDuration());
List<StreamFileOffset> fileOffsets = Lists.newArrayList();
getFileOffsets(partitionLocation, fileOffsets, generation);
LOG.info("Empty consumer state. Create file reader with file offsets: groupId={}, instanceId={} states={}", consumerState.getGroupId(), consumerState.getInstanceId(), fileOffsets);
MultiLiveStreamFileReader reader = new MultiLiveStreamFileReader(streamConfig, fileOffsets);
reader.initialize();
return reader;
}
Aggregations