Search in sources :

Example 31 with StreamEvent

use of co.cask.cdap.api.flow.flowlet.StreamEvent in project cdap by caskdata.

the class StreamConsumerTestBase method testNamespacedStreamConsumers.

@Test
public void testNamespacedStreamConsumers() throws Exception {
    // Test two consumers for two streams with the same name, but in different namespaces. Their consumption should be
    // independent of the other.
    String stream = "testNamespacedStreamConsumers";
    StreamId streamId = TEST_NAMESPACE.stream(stream);
    StreamId otherStreamId = OTHER_NAMESPACE.stream(stream);
    StreamAdmin streamAdmin = getStreamAdmin();
    streamAdmin.create(streamId);
    streamAdmin.create(otherStreamId);
    StreamConfig streamConfig = streamAdmin.getConfig(streamId);
    StreamConfig otherStreamConfig = streamAdmin.getConfig(otherStreamId);
    // Writes 5 events to both streams
    writeEvents(streamConfig, "Testing ", 5);
    writeEvents(otherStreamConfig, "Testing ", 5);
    streamAdmin.configureInstances(streamId, 0L, 1);
    streamAdmin.configureInstances(otherStreamId, 0L, 1);
    StreamConsumerFactory consumerFactory = getConsumerFactory();
    StreamConsumer consumer = consumerFactory.create(streamId, "fifo.rollback", new ConsumerConfig(0L, 0, 1, DequeueStrategy.FIFO, null));
    StreamConsumer otherConsumer = consumerFactory.create(otherStreamId, "fifo.rollback", new ConsumerConfig(0L, 0, 1, DequeueStrategy.FIFO, null));
    // Try to dequeue using both consumers
    TransactionContext context = createTxContext(consumer);
    TransactionContext otherContext = createTxContext(otherConsumer);
    context.start();
    otherContext.start();
    // Consume events from the stream in the default namespace
    DequeueResult<StreamEvent> result0 = consumer.poll(1, 1, TimeUnit.SECONDS);
    Assert.assertEquals("Testing 0", Charsets.UTF_8.decode(result0.iterator().next().getBody()).toString());
    context.finish();
    context.start();
    result0 = consumer.poll(1, 1, TimeUnit.SECONDS);
    Assert.assertEquals("Testing 1", Charsets.UTF_8.decode(result0.iterator().next().getBody()).toString());
    context.finish();
    context.start();
    result0 = consumer.poll(1, 1, TimeUnit.SECONDS);
    Assert.assertEquals("Testing 2", Charsets.UTF_8.decode(result0.iterator().next().getBody()).toString());
    context.finish();
    context.start();
    // Even though a stream with the same name has already consumed 3 events, the otherConsumer is for a stream in a
    // different namespace, so it will still be on the initial event.
    DequeueResult<StreamEvent> result1 = otherConsumer.poll(1, 1, TimeUnit.SECONDS);
    Assert.assertEquals("Testing 0", Charsets.UTF_8.decode(result1.iterator().next().getBody()).toString());
    otherContext.finish();
    otherContext.start();
    result0 = consumer.poll(1, 1, TimeUnit.SECONDS);
    result1 = otherConsumer.poll(1, 1, TimeUnit.SECONDS);
    Assert.assertEquals("Testing 3", Charsets.UTF_8.decode(result0.iterator().next().getBody()).toString());
    Assert.assertEquals("Testing 1", Charsets.UTF_8.decode(result1.iterator().next().getBody()).toString());
    // Commit both
    context.finish();
    otherContext.finish();
    consumer.close();
    otherConsumer.close();
}
Also used : StreamId(co.cask.cdap.proto.id.StreamId) TransactionContext(org.apache.tephra.TransactionContext) StreamEvent(co.cask.cdap.api.flow.flowlet.StreamEvent) ConsumerConfig(co.cask.cdap.data2.queue.ConsumerConfig) Test(org.junit.Test)

Example 32 with StreamEvent

use of co.cask.cdap.api.flow.flowlet.StreamEvent in project cdap by caskdata.

the class StreamConsumerTestBase method testFIFORollback.

@Test
public void testFIFORollback() throws Exception {
    String stream = "testFIFORollback";
    StreamId streamId = TEST_NAMESPACE.stream(stream);
    StreamAdmin streamAdmin = getStreamAdmin();
    streamAdmin.create(streamId);
    StreamConfig streamConfig = streamAdmin.getConfig(streamId);
    // Writes 5 events
    writeEvents(streamConfig, "Testing ", 5);
    streamAdmin.configureInstances(streamId, 0L, 2);
    StreamConsumerFactory consumerFactory = getConsumerFactory();
    StreamConsumer consumer0 = consumerFactory.create(streamId, "fifo.rollback", new ConsumerConfig(0L, 0, 2, DequeueStrategy.FIFO, null));
    StreamConsumer consumer1 = consumerFactory.create(streamId, "fifo.rollback", new ConsumerConfig(0L, 1, 2, DequeueStrategy.FIFO, null));
    // Try to dequeue using both consumers
    TransactionContext context0 = createTxContext(consumer0);
    TransactionContext context1 = createTxContext(consumer1);
    context0.start();
    context1.start();
    DequeueResult<StreamEvent> result0 = consumer0.poll(1, 1, TimeUnit.SECONDS);
    DequeueResult<StreamEvent> result1 = consumer1.poll(1, 1, TimeUnit.SECONDS);
    Assert.assertEquals("Testing 0", Charsets.UTF_8.decode(result0.iterator().next().getBody()).toString());
    Assert.assertEquals("Testing 1", Charsets.UTF_8.decode(result1.iterator().next().getBody()).toString());
    // Commit the first one, rollback the second one.
    context0.finish();
    context1.abort();
    // Dequeue again with the consuemrs
    context0.start();
    context1.start();
    result0 = consumer0.poll(1, 1, TimeUnit.SECONDS);
    result1 = consumer1.poll(1, 1, TimeUnit.SECONDS);
    // Expect consumer 0 keep proceeding while consumer 1 will retry with what it claimed in previous transaction.
    // This is the optimization in FIFO mode to avoid going back and rescanning.
    Assert.assertEquals("Testing 2", Charsets.UTF_8.decode(result0.iterator().next().getBody()).toString());
    Assert.assertEquals("Testing 1", Charsets.UTF_8.decode(result1.iterator().next().getBody()).toString());
    // Commit both
    context0.finish();
    context1.finish();
    consumer0.close();
    consumer1.close();
}
Also used : StreamId(co.cask.cdap.proto.id.StreamId) TransactionContext(org.apache.tephra.TransactionContext) StreamEvent(co.cask.cdap.api.flow.flowlet.StreamEvent) ConsumerConfig(co.cask.cdap.data2.queue.ConsumerConfig) Test(org.junit.Test)

Example 33 with StreamEvent

use of co.cask.cdap.api.flow.flowlet.StreamEvent in project cdap by caskdata.

the class StreamConsumerTestBase method testTTLStartingFile.

@Category(SlowTests.class)
@Test
public void testTTLStartingFile() throws Exception {
    String stream = "testTTLStartingFile";
    StreamId streamId = TEST_NAMESPACE.stream(stream);
    StreamAdmin streamAdmin = getStreamAdmin();
    // Create stream with ttl of 3 seconds and partition duration of 3 seconds
    final long ttl = TimeUnit.SECONDS.toMillis(3);
    Properties streamProperties = new Properties();
    streamProperties.setProperty(Constants.Stream.TTL, Long.toString(ttl));
    streamProperties.setProperty(Constants.Stream.PARTITION_DURATION, Long.toString(ttl));
    streamAdmin.create(streamId, streamProperties);
    StreamConfig streamConfig = streamAdmin.getConfig(streamId);
    streamAdmin.configureGroups(streamId, ImmutableMap.of(0L, 1, 1L, 1));
    StreamConsumerFactory consumerFactory = getConsumerFactory();
    StreamConsumer consumer = consumerFactory.create(streamId, stream, new ConsumerConfig(0L, 0, 1, DequeueStrategy.FIFO, null));
    StreamConsumer newConsumer;
    Set<StreamEvent> expectedEvents = Sets.newTreeSet(STREAM_EVENT_COMPARATOR);
    try {
        // Create a new consumer for second consumer verification.
        // Need to create consumer before write event because in HBase, creation of consumer took couple seconds.
        newConsumer = consumerFactory.create(streamId, stream, new ConsumerConfig(1L, 0, 1, DequeueStrategy.FIFO, null));
        // write 20 events in a partition that will be expired due to sleeping the TTL
        writeEvents(streamConfig, "Phase 0 expired event ", 20);
        Thread.sleep(ttl);
        verifyEvents(consumer, expectedEvents);
        // also verify for a new consumer
        try {
            verifyEvents(newConsumer, expectedEvents);
        } finally {
            newConsumer.close();
        }
        // Create a new consumer for second consumer verification (with clean state)
        // Need to create consumer before write event because in HBase, creation of consumer took couple seconds.
        streamAdmin.configureGroups(streamId, ImmutableMap.of(0L, 1));
        streamAdmin.configureGroups(streamId, ImmutableMap.of(0L, 1, 1L, 1));
        newConsumer = consumerFactory.create(streamId, stream, new ConsumerConfig(1L, 0, 1, DequeueStrategy.FIFO, null));
        // write 20 events in a partition and read it back immediately. They shouldn't expired.
        expectedEvents.addAll(writeEvents(streamConfig, "Phase 1 non-expired event ", 20));
        verifyEvents(consumer, expectedEvents);
        // also verify for a new consumer
        try {
            verifyEvents(newConsumer, expectedEvents);
        } finally {
            newConsumer.close();
        }
        // Create a new consumer for second consumer verification (with clean state)
        // Need to create consumer before write event because in HBase, creation of consumer took couple seconds.
        streamAdmin.configureGroups(streamId, ImmutableMap.of(0L, 1));
        streamAdmin.configureGroups(streamId, ImmutableMap.of(0L, 1, 1L, 1));
        newConsumer = consumerFactory.create(streamId, stream, new ConsumerConfig(1L, 0, 1, DequeueStrategy.FIFO, null));
        // write 20 events in a partition that will be expired due to sleeping the TTL
        // This will write to a new partition different then the first batch write.
        // Also, because it sleep TTL time, the previous batch write would also get expired.
        expectedEvents.clear();
        writeEvents(streamConfig, "Phase 2 expired event ", 20);
        Thread.sleep(ttl);
        verifyEvents(consumer, expectedEvents);
        // also verify for a new consumer
        try {
            verifyEvents(newConsumer, expectedEvents);
        } finally {
            newConsumer.close();
        }
        // Create a new consumer for second consumer verification (with clean state)
        // Need to create consumer before write event because in HBase, creation of consumer took couple seconds.
        streamAdmin.configureGroups(streamId, ImmutableMap.of(0L, 1));
        streamAdmin.configureGroups(streamId, ImmutableMap.of(0L, 1, 1L, 1));
        newConsumer = consumerFactory.create(streamId, stream, new ConsumerConfig(1L, 0, 1, DequeueStrategy.FIFO, null));
        // write 20 events in a partition and read it back immediately. They shouldn't expire.
        expectedEvents.addAll(writeEvents(streamConfig, "Phase 3 non-expired event ", 20));
        verifyEvents(consumer, expectedEvents);
        // also verify for a new consumer
        try {
            verifyEvents(newConsumer, expectedEvents);
        } finally {
            newConsumer.close();
        }
        // Should be no more pending events
        expectedEvents.clear();
        verifyEvents(consumer, expectedEvents);
    } finally {
        consumer.close();
    }
}
Also used : StreamId(co.cask.cdap.proto.id.StreamId) StreamEvent(co.cask.cdap.api.flow.flowlet.StreamEvent) ConsumerConfig(co.cask.cdap.data2.queue.ConsumerConfig) Properties(java.util.Properties) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Example 34 with StreamEvent

use of co.cask.cdap.api.flow.flowlet.StreamEvent in project cdap by caskdata.

the class StreamFetchHandler method fetch.

/**
 * Handler for the HTTP API {@code /streams/[stream_name]/events?start=[start_ts]&end=[end_ts]&limit=[event_limit]}
 * <p>
 * Responds with:
 * <ul>
 * <li>404 if stream does not exist</li>
 * <li>204 if no event in the given start/end time range exists</li>
 * <li>200 if there is are one or more events</li>
 * </ul>
 * </p>
 * <p>
 * Response body is a JSON array of the StreamEvent object.
 * </p>
 *
 * @see StreamEventTypeAdapter StreamEventTypeAdapter for the format of the StreamEvent object
 */
@GET
@Path("/{stream}/events")
public void fetch(HttpRequest request, final HttpResponder responder, @PathParam("namespace-id") String namespaceId, @PathParam("stream") String stream, @QueryParam("start") @DefaultValue("0") String start, @QueryParam("end") @DefaultValue("9223372036854775807") String end, @QueryParam("limit") @DefaultValue("2147483647") final int limitEvents) throws Exception {
    long startTime = TimeMathParser.parseTime(start, TimeUnit.MILLISECONDS);
    long endTime = TimeMathParser.parseTime(end, TimeUnit.MILLISECONDS);
    StreamId streamId = new StreamId(namespaceId, stream);
    if (!verifyGetEventsRequest(streamId, startTime, endTime, limitEvents, responder)) {
        return;
    }
    // Make sure the user has READ permission on the stream since getConfig doesn't check for the same.
    authorizationEnforcer.enforce(streamId, authenticationContext.getPrincipal(), Action.READ);
    final StreamConfig streamConfig = streamAdmin.getConfig(streamId);
    long now = System.currentTimeMillis();
    startTime = Math.max(startTime, now - streamConfig.getTTL());
    endTime = Math.min(endTime, now);
    final long streamStartTime = startTime;
    final long streamEndTime = endTime;
    impersonator.doAs(streamId, new Callable<Void>() {

        @Override
        public Void call() throws Exception {
            int limit = limitEvents;
            // Create the stream event reader
            try (FileReader<StreamEventOffset, Iterable<StreamFileOffset>> reader = createReader(streamConfig, streamStartTime)) {
                TimeRangeReadFilter readFilter = new TimeRangeReadFilter(streamStartTime, streamEndTime);
                List<StreamEvent> events = Lists.newArrayListWithCapacity(100);
                // Reads the first batch of events from the stream.
                int eventsRead = readEvents(reader, events, limit, readFilter);
                // If empty already, return 204 no content
                if (eventsRead <= 0) {
                    responder.sendStatus(HttpResponseStatus.NO_CONTENT);
                    return null;
                }
                // Send with chunk response, as we don't want to buffer all events in memory to determine the content-length.
                ChunkResponder chunkResponder = responder.sendChunkStart(HttpResponseStatus.OK, new DefaultHttpHeaders().set(HttpHeaderNames.CONTENT_TYPE, "application/json; charset=utf-8"));
                ByteBuf buffer = Unpooled.buffer();
                JsonWriter jsonWriter = new JsonWriter(new OutputStreamWriter(new ByteBufOutputStream(buffer), StandardCharsets.UTF_8));
                // Response is an array of stream event
                jsonWriter.beginArray();
                while (limit > 0 && eventsRead > 0) {
                    limit -= eventsRead;
                    for (StreamEvent event : events) {
                        GSON.toJson(event, StreamEvent.class, jsonWriter);
                        jsonWriter.flush();
                        // If exceeded chunk size limit, send a new chunk.
                        if (buffer.readableBytes() >= CHUNK_SIZE) {
                            // If the connect is closed, sendChunk will throw IOException.
                            // No need to handle the exception as it will just propagated back to the netty-http library
                            // and it will handle it.
                            // Need to copy the buffer because the buffer will get reused and send chunk is an async operation
                            chunkResponder.sendChunk(buffer.copy());
                            buffer.clear();
                        }
                    }
                    events.clear();
                    if (limit > 0) {
                        eventsRead = readEvents(reader, events, limit, readFilter);
                    }
                }
                jsonWriter.endArray();
                jsonWriter.close();
                // Send the last chunk that still has data
                if (buffer.isReadable()) {
                    // No need to copy the last chunk, since the buffer will not be reused
                    chunkResponder.sendChunk(buffer);
                }
                Closeables.closeQuietly(chunkResponder);
            }
            return null;
        }
    });
}
Also used : StreamId(co.cask.cdap.proto.id.StreamId) ByteBufOutputStream(io.netty.buffer.ByteBufOutputStream) StreamEvent(co.cask.cdap.api.flow.flowlet.StreamEvent) StreamConfig(co.cask.cdap.data2.transaction.stream.StreamConfig) ByteBuf(io.netty.buffer.ByteBuf) JsonWriter(com.google.gson.stream.JsonWriter) IOException(java.io.IOException) DefaultHttpHeaders(io.netty.handler.codec.http.DefaultHttpHeaders) FileReader(co.cask.cdap.data.file.FileReader) MultiLiveStreamFileReader(co.cask.cdap.data.stream.MultiLiveStreamFileReader) List(java.util.List) OutputStreamWriter(java.io.OutputStreamWriter) TimeRangeReadFilter(co.cask.cdap.data.stream.TimeRangeReadFilter) StreamFileOffset(co.cask.cdap.data.stream.StreamFileOffset) ChunkResponder(co.cask.http.ChunkResponder) Path(javax.ws.rs.Path) GET(javax.ws.rs.GET)

Example 35 with StreamEvent

use of co.cask.cdap.api.flow.flowlet.StreamEvent in project cdap by caskdata.

the class StreamTailer method main.

public static void main(String[] args) throws Exception {
    if (args.length < 1) {
        System.out.println(String.format("Usage: java %s [streamName]", StreamTailer.class.getName()));
        return;
    }
    String streamName = args[0];
    CConfiguration cConf = CConfiguration.create();
    Configuration hConf = new Configuration();
    String txClientId = StreamTailer.class.getName();
    Injector injector = Guice.createInjector(new ConfigModule(cConf, hConf), new DataFabricModules(txClientId).getDistributedModules(), new DataSetsModules().getDistributedModules(), new LocationRuntimeModule().getDistributedModules(), new ExploreClientModule(), new ViewAdminModules().getDistributedModules(), new StreamAdminModules().getDistributedModules(), new AuthorizationEnforcementModule().getDistributedModules(), new AuthenticationContextModules().getMasterModule(), new NotificationFeedClientModule());
    StreamAdmin streamAdmin = injector.getInstance(StreamAdmin.class);
    // TODO: get namespace from commandline arguments
    StreamId streamId = NamespaceId.DEFAULT.stream(streamName);
    StreamConfig streamConfig = streamAdmin.getConfig(streamId);
    Location streamLocation = streamConfig.getLocation();
    List<Location> eventFiles = Lists.newArrayList();
    for (Location partition : streamLocation.list()) {
        if (!partition.isDirectory()) {
            continue;
        }
        for (Location file : partition.list()) {
            if (StreamFileType.EVENT.isMatched(file.getName())) {
                eventFiles.add(file);
            }
        }
    }
    int generation = StreamUtils.getGeneration(streamConfig);
    MultiLiveStreamFileReader reader = new MultiLiveStreamFileReader(streamConfig, ImmutableList.copyOf(Iterables.transform(eventFiles, createOffsetConverter(generation))));
    List<StreamEvent> events = Lists.newArrayList();
    while (reader.read(events, 10, 100, TimeUnit.MILLISECONDS) >= 0) {
        for (StreamEvent event : events) {
            System.out.println(event.getTimestamp() + " " + Charsets.UTF_8.decode(event.getBody()));
        }
        events.clear();
    }
    reader.close();
}
Also used : StreamId(co.cask.cdap.proto.id.StreamId) CConfiguration(co.cask.cdap.common.conf.CConfiguration) Configuration(org.apache.hadoop.conf.Configuration) ConfigModule(co.cask.cdap.common.guice.ConfigModule) AuthenticationContextModules(co.cask.cdap.security.auth.context.AuthenticationContextModules) DataSetsModules(co.cask.cdap.data.runtime.DataSetsModules) StreamEvent(co.cask.cdap.api.flow.flowlet.StreamEvent) LocationRuntimeModule(co.cask.cdap.common.guice.LocationRuntimeModule) StreamConfig(co.cask.cdap.data2.transaction.stream.StreamConfig) CConfiguration(co.cask.cdap.common.conf.CConfiguration) ViewAdminModules(co.cask.cdap.data.view.ViewAdminModules) StreamAdmin(co.cask.cdap.data2.transaction.stream.StreamAdmin) ExploreClientModule(co.cask.cdap.explore.guice.ExploreClientModule) Injector(com.google.inject.Injector) NotificationFeedClientModule(co.cask.cdap.notifications.feeds.client.NotificationFeedClientModule) DataFabricModules(co.cask.cdap.data.runtime.DataFabricModules) AuthorizationEnforcementModule(co.cask.cdap.security.authorization.AuthorizationEnforcementModule) Location(org.apache.twill.filesystem.Location)

Aggregations

StreamEvent (co.cask.cdap.api.flow.flowlet.StreamEvent)84 Test (org.junit.Test)65 Location (org.apache.twill.filesystem.Location)27 StreamId (co.cask.cdap.proto.id.StreamId)24 StructuredRecord (co.cask.cdap.api.data.format.StructuredRecord)19 FormatSpecification (co.cask.cdap.api.data.format.FormatSpecification)17 Schema (co.cask.cdap.api.data.schema.Schema)10 IOException (java.io.IOException)9 StreamConfig (co.cask.cdap.data2.transaction.stream.StreamConfig)8 ByteBuffer (java.nio.ByteBuffer)8 ConsumerConfig (co.cask.cdap.data2.queue.ConsumerConfig)7 StreamAdmin (co.cask.cdap.data2.transaction.stream.StreamAdmin)6 TransactionContext (org.apache.tephra.TransactionContext)6 BinaryDecoder (co.cask.cdap.common.io.BinaryDecoder)5 TypeToken (com.google.common.reflect.TypeToken)5 StreamEventCodec (co.cask.cdap.common.stream.StreamEventCodec)4 IdentityStreamEventDecoder (co.cask.cdap.data.stream.decoder.IdentityStreamEventDecoder)4 File (java.io.File)4 SchemaHash (co.cask.cdap.api.data.schema.SchemaHash)3 QueueName (co.cask.cdap.common.queue.QueueName)3