Search in sources :

Example 1 with StreamFileOffset

use of co.cask.cdap.data.stream.StreamFileOffset in project cdap by caskdata.

the class FileStreamAdmin method mutateStates.

private void mutateStates(long groupId, int instances, Set<StreamConsumerState> states, Set<StreamConsumerState> newStates, Set<StreamConsumerState> removeStates) {
    int oldInstances = states.size();
    if (oldInstances == instances) {
        // If number of instances doesn't changed, no need to mutate any states
        return;
    }
    // Collects smallest offsets across all existing consumers
    // Map from event file location to file offset.
    // Use tree map to maintain ordering consistency in the offsets.
    // Not required by any logic, just easier to look at when logged.
    Map<Location, StreamFileOffset> fileOffsets = Maps.newTreeMap(Locations.LOCATION_COMPARATOR);
    for (StreamConsumerState state : states) {
        for (StreamFileOffset fileOffset : state.getState()) {
            StreamFileOffset smallestOffset = fileOffsets.get(fileOffset.getEventLocation());
            if (smallestOffset == null || fileOffset.getOffset() < smallestOffset.getOffset()) {
                fileOffsets.put(fileOffset.getEventLocation(), new StreamFileOffset(fileOffset));
            }
        }
    }
    // Constructs smallest offsets
    Collection<StreamFileOffset> smallestOffsets = fileOffsets.values();
    // When group size changed, reset all existing instances states to have smallest files offsets constructed above.
    for (StreamConsumerState state : states) {
        if (state.getInstanceId() < instances) {
            // Only keep valid instances
            newStates.add(new StreamConsumerState(groupId, state.getInstanceId(), smallestOffsets));
        } else {
            removeStates.add(state);
        }
    }
    // For all new instances, set files offsets to smallest one constructed above.
    for (int i = oldInstances; i < instances; i++) {
        newStates.add(new StreamConsumerState(groupId, i, smallestOffsets));
    }
}
Also used : StreamFileOffset(co.cask.cdap.data.stream.StreamFileOffset) Location(org.apache.twill.filesystem.Location)

Example 2 with StreamFileOffset

use of co.cask.cdap.data.stream.StreamFileOffset in project cdap by caskdata.

the class StreamFetchHandler method fetch.

/**
 * Handler for the HTTP API {@code /streams/[stream_name]/events?start=[start_ts]&end=[end_ts]&limit=[event_limit]}
 * <p>
 * Responds with:
 * <ul>
 * <li>404 if stream does not exist</li>
 * <li>204 if no event in the given start/end time range exists</li>
 * <li>200 if there is are one or more events</li>
 * </ul>
 * </p>
 * <p>
 * Response body is a JSON array of the StreamEvent object.
 * </p>
 *
 * @see StreamEventTypeAdapter StreamEventTypeAdapter for the format of the StreamEvent object
 */
@GET
@Path("/{stream}/events")
public void fetch(HttpRequest request, final HttpResponder responder, @PathParam("namespace-id") String namespaceId, @PathParam("stream") String stream, @QueryParam("start") @DefaultValue("0") String start, @QueryParam("end") @DefaultValue("9223372036854775807") String end, @QueryParam("limit") @DefaultValue("2147483647") final int limitEvents) throws Exception {
    long startTime = TimeMathParser.parseTime(start, TimeUnit.MILLISECONDS);
    long endTime = TimeMathParser.parseTime(end, TimeUnit.MILLISECONDS);
    StreamId streamId = new StreamId(namespaceId, stream);
    if (!verifyGetEventsRequest(streamId, startTime, endTime, limitEvents, responder)) {
        return;
    }
    // Make sure the user has READ permission on the stream since getConfig doesn't check for the same.
    authorizationEnforcer.enforce(streamId, authenticationContext.getPrincipal(), Action.READ);
    final StreamConfig streamConfig = streamAdmin.getConfig(streamId);
    long now = System.currentTimeMillis();
    startTime = Math.max(startTime, now - streamConfig.getTTL());
    endTime = Math.min(endTime, now);
    final long streamStartTime = startTime;
    final long streamEndTime = endTime;
    impersonator.doAs(streamId, new Callable<Void>() {

        @Override
        public Void call() throws Exception {
            int limit = limitEvents;
            // Create the stream event reader
            try (FileReader<StreamEventOffset, Iterable<StreamFileOffset>> reader = createReader(streamConfig, streamStartTime)) {
                TimeRangeReadFilter readFilter = new TimeRangeReadFilter(streamStartTime, streamEndTime);
                List<StreamEvent> events = Lists.newArrayListWithCapacity(100);
                // Reads the first batch of events from the stream.
                int eventsRead = readEvents(reader, events, limit, readFilter);
                // If empty already, return 204 no content
                if (eventsRead <= 0) {
                    responder.sendStatus(HttpResponseStatus.NO_CONTENT);
                    return null;
                }
                // Send with chunk response, as we don't want to buffer all events in memory to determine the content-length.
                ChunkResponder chunkResponder = responder.sendChunkStart(HttpResponseStatus.OK, new DefaultHttpHeaders().set(HttpHeaderNames.CONTENT_TYPE, "application/json; charset=utf-8"));
                ByteBuf buffer = Unpooled.buffer();
                JsonWriter jsonWriter = new JsonWriter(new OutputStreamWriter(new ByteBufOutputStream(buffer), StandardCharsets.UTF_8));
                // Response is an array of stream event
                jsonWriter.beginArray();
                while (limit > 0 && eventsRead > 0) {
                    limit -= eventsRead;
                    for (StreamEvent event : events) {
                        GSON.toJson(event, StreamEvent.class, jsonWriter);
                        jsonWriter.flush();
                        // If exceeded chunk size limit, send a new chunk.
                        if (buffer.readableBytes() >= CHUNK_SIZE) {
                            // If the connect is closed, sendChunk will throw IOException.
                            // No need to handle the exception as it will just propagated back to the netty-http library
                            // and it will handle it.
                            // Need to copy the buffer because the buffer will get reused and send chunk is an async operation
                            chunkResponder.sendChunk(buffer.copy());
                            buffer.clear();
                        }
                    }
                    events.clear();
                    if (limit > 0) {
                        eventsRead = readEvents(reader, events, limit, readFilter);
                    }
                }
                jsonWriter.endArray();
                jsonWriter.close();
                // Send the last chunk that still has data
                if (buffer.isReadable()) {
                    // No need to copy the last chunk, since the buffer will not be reused
                    chunkResponder.sendChunk(buffer);
                }
                Closeables.closeQuietly(chunkResponder);
            }
            return null;
        }
    });
}
Also used : StreamId(co.cask.cdap.proto.id.StreamId) ByteBufOutputStream(io.netty.buffer.ByteBufOutputStream) StreamEvent(co.cask.cdap.api.flow.flowlet.StreamEvent) StreamConfig(co.cask.cdap.data2.transaction.stream.StreamConfig) ByteBuf(io.netty.buffer.ByteBuf) JsonWriter(com.google.gson.stream.JsonWriter) IOException(java.io.IOException) DefaultHttpHeaders(io.netty.handler.codec.http.DefaultHttpHeaders) FileReader(co.cask.cdap.data.file.FileReader) MultiLiveStreamFileReader(co.cask.cdap.data.stream.MultiLiveStreamFileReader) List(java.util.List) OutputStreamWriter(java.io.OutputStreamWriter) TimeRangeReadFilter(co.cask.cdap.data.stream.TimeRangeReadFilter) StreamFileOffset(co.cask.cdap.data.stream.StreamFileOffset) ChunkResponder(co.cask.http.ChunkResponder) Path(javax.ws.rs.Path) GET(javax.ws.rs.GET)

Example 3 with StreamFileOffset

use of co.cask.cdap.data.stream.StreamFileOffset in project cdap by caskdata.

the class StreamConsumerStateTestBase method testChangeInstance.

@Test
public void testChangeInstance() throws Exception {
    StreamAdmin streamAdmin = getStreamAdmin();
    String streamName = "testChangeInstance";
    StreamId streamId = TEST_NAMESPACE.stream(streamName);
    streamAdmin.create(streamId);
    StreamConfig config = streamAdmin.getConfig(streamId);
    // Creates a state with 4 offsets
    StreamConsumerState state = generateState(0L, 0, config, 0L, 4);
    StreamConsumerStateStore stateStore = createStateStore(config);
    // Save the state.
    stateStore.save(state);
    // Increase the number of instances
    streamAdmin.configureInstances(streamId, 0L, 2);
    StreamConsumerState newState = stateStore.get(0L, 1);
    // Get the state of the new instance, should be the same as the existing one
    Assert.assertTrue(Iterables.elementsEqual(state.getState(), newState.getState()));
    // Change the state of instance 0 to higher offset.
    List<StreamFileOffset> fileOffsets = Lists.newArrayList(state.getState());
    StreamFileOffset fileOffset = fileOffsets.get(0);
    long oldOffset = fileOffset.getOffset();
    long newOffset = oldOffset + 100000;
    fileOffsets.set(0, new StreamFileOffset(fileOffset, newOffset));
    state.setState(fileOffsets);
    stateStore.save(state);
    // Verify the change
    state = stateStore.get(0L, 0);
    Assert.assertEquals(newOffset, Iterables.get(state.getState(), 0).getOffset());
    // Increase the number of instances again
    streamAdmin.configureInstances(streamId, 0L, 3);
    // Verify that instance 0 has offset getting resetted to lowest
    state = stateStore.get(0L, 0);
    Assert.assertEquals(oldOffset, Iterables.get(state.getState(), 0).getOffset());
    // Verify that no new file offsets state is being introduced (test a bug in the configureInstance implementation)
    Assert.assertEquals(4, Iterables.size(state.getState()));
    // Verify that all offsets are the same
    List<StreamConsumerState> states = Lists.newArrayList();
    stateStore.getByGroup(0L, states);
    Assert.assertEquals(3, states.size());
    Assert.assertTrue(Iterables.elementsEqual(states.get(0).getState(), states.get(1).getState()));
    Assert.assertTrue(Iterables.elementsEqual(states.get(0).getState(), states.get(2).getState()));
}
Also used : StreamId(co.cask.cdap.proto.id.StreamId) StreamFileOffset(co.cask.cdap.data.stream.StreamFileOffset) Test(org.junit.Test)

Example 4 with StreamFileOffset

use of co.cask.cdap.data.stream.StreamFileOffset in project cdap by caskdata.

the class StreamConsumerStateTestBase method generateState.

private StreamConsumerState generateState(long groupId, int instanceId, StreamConfig config, long partitionBaseTime, int numOffsets) throws IOException {
    List<StreamFileOffset> offsets = Lists.newArrayList();
    long partitionDuration = config.getPartitionDuration();
    for (int i = 0; i < numOffsets; i++) {
        Location partitionLocation = StreamUtils.createPartitionLocation(config.getLocation(), (partitionBaseTime + i) * partitionDuration, config.getPartitionDuration());
        offsets.add(new StreamFileOffset(StreamUtils.createStreamLocation(partitionLocation, "file", 0, StreamFileType.EVENT), i * 1000, 0));
    }
    return new StreamConsumerState(groupId, instanceId, offsets);
}
Also used : StreamFileOffset(co.cask.cdap.data.stream.StreamFileOffset) Location(org.apache.twill.filesystem.Location)

Example 5 with StreamFileOffset

use of co.cask.cdap.data.stream.StreamFileOffset in project cdap by caskdata.

the class AbstractStreamFileConsumerFactory method createReader.

private MultiLiveStreamFileReader createReader(final StreamConfig streamConfig, StreamConsumerState consumerState) throws IOException {
    Location streamLocation = streamConfig.getLocation();
    Preconditions.checkNotNull(streamLocation, "Stream location is null for %s", streamConfig.getStreamId());
    // Look for the latest stream generation
    final int generation = StreamUtils.getGeneration(streamConfig);
    streamLocation = StreamUtils.createGenerationLocation(streamLocation, generation);
    final long currentTime = System.currentTimeMillis();
    if (!Iterables.isEmpty(consumerState.getState())) {
        // See if any offset has a different generation or is expired. If so, don't use the old states.
        boolean useStoredStates = Iterables.all(consumerState.getState(), new Predicate<StreamFileOffset>() {

            @Override
            public boolean apply(StreamFileOffset input) {
                boolean isExpired = input.getPartitionEnd() < currentTime - streamConfig.getTTL();
                boolean sameGeneration = generation == input.getGeneration();
                return !isExpired && sameGeneration;
            }
        });
        if (useStoredStates) {
            LOG.info("Create file reader with consumer state: {}", consumerState);
            // Has existing offsets, just resume from there.
            MultiLiveStreamFileReader reader = new MultiLiveStreamFileReader(streamConfig, consumerState.getState());
            reader.initialize();
            return reader;
        }
    }
    // TODO: Support starting from some time rather then from beginning.
    // Otherwise, search for files with the smallest partition start time
    // If no partition exists for the stream, start with one partition earlier than current time to make sure
    // no event will be lost if events start flowing in about the same time.
    long startTime = StreamUtils.getPartitionStartTime(currentTime - streamConfig.getPartitionDuration(), streamConfig.getPartitionDuration());
    long earliestNonExpiredTime = StreamUtils.getPartitionStartTime(currentTime - streamConfig.getTTL(), streamConfig.getPartitionDuration());
    for (Location partitionLocation : streamLocation.list()) {
        if (!partitionLocation.isDirectory() || !StreamUtils.isPartition(partitionLocation.getName())) {
            // Partition should be a directory
            continue;
        }
        long partitionStartTime = StreamUtils.getPartitionStartTime(partitionLocation.getName());
        boolean isPartitionExpired = partitionStartTime < earliestNonExpiredTime;
        if (!isPartitionExpired && partitionStartTime < startTime) {
            startTime = partitionStartTime;
        }
    }
    // Create file offsets
    // TODO: Be able to support dynamic name of stream writer instances.
    // Maybe it's done through MultiLiveStreamHandler to alter list of file offsets dynamically
    Location partitionLocation = StreamUtils.createPartitionLocation(streamLocation, startTime, streamConfig.getPartitionDuration());
    List<StreamFileOffset> fileOffsets = Lists.newArrayList();
    getFileOffsets(partitionLocation, fileOffsets, generation);
    LOG.info("Empty consumer state. Create file reader with file offsets: groupId={}, instanceId={} states={}", consumerState.getGroupId(), consumerState.getInstanceId(), fileOffsets);
    MultiLiveStreamFileReader reader = new MultiLiveStreamFileReader(streamConfig, fileOffsets);
    reader.initialize();
    return reader;
}
Also used : MultiLiveStreamFileReader(co.cask.cdap.data.stream.MultiLiveStreamFileReader) StreamFileOffset(co.cask.cdap.data.stream.StreamFileOffset) Location(org.apache.twill.filesystem.Location)

Aggregations

StreamFileOffset (co.cask.cdap.data.stream.StreamFileOffset)7 Location (org.apache.twill.filesystem.Location)5 MultiLiveStreamFileReader (co.cask.cdap.data.stream.MultiLiveStreamFileReader)3 StreamId (co.cask.cdap.proto.id.StreamId)2 StreamEvent (co.cask.cdap.api.flow.flowlet.StreamEvent)1 FileReader (co.cask.cdap.data.file.FileReader)1 TimeRangeReadFilter (co.cask.cdap.data.stream.TimeRangeReadFilter)1 StreamConfig (co.cask.cdap.data2.transaction.stream.StreamConfig)1 ChunkResponder (co.cask.http.ChunkResponder)1 JsonWriter (com.google.gson.stream.JsonWriter)1 ByteBuf (io.netty.buffer.ByteBuf)1 ByteBufOutputStream (io.netty.buffer.ByteBufOutputStream)1 DefaultHttpHeaders (io.netty.handler.codec.http.DefaultHttpHeaders)1 IOException (java.io.IOException)1 OutputStreamWriter (java.io.OutputStreamWriter)1 List (java.util.List)1 GET (javax.ws.rs.GET)1 Path (javax.ws.rs.Path)1 Test (org.junit.Test)1