Search in sources :

Example 51 with StreamId

use of co.cask.cdap.proto.id.StreamId in project cdap by caskdata.

the class StreamConsumerStateTestBase method testChangeInstance.

@Test
public void testChangeInstance() throws Exception {
    StreamAdmin streamAdmin = getStreamAdmin();
    String streamName = "testChangeInstance";
    StreamId streamId = TEST_NAMESPACE.stream(streamName);
    streamAdmin.create(streamId);
    StreamConfig config = streamAdmin.getConfig(streamId);
    // Creates a state with 4 offsets
    StreamConsumerState state = generateState(0L, 0, config, 0L, 4);
    StreamConsumerStateStore stateStore = createStateStore(config);
    // Save the state.
    stateStore.save(state);
    // Increase the number of instances
    streamAdmin.configureInstances(streamId, 0L, 2);
    StreamConsumerState newState = stateStore.get(0L, 1);
    // Get the state of the new instance, should be the same as the existing one
    Assert.assertTrue(Iterables.elementsEqual(state.getState(), newState.getState()));
    // Change the state of instance 0 to higher offset.
    List<StreamFileOffset> fileOffsets = Lists.newArrayList(state.getState());
    StreamFileOffset fileOffset = fileOffsets.get(0);
    long oldOffset = fileOffset.getOffset();
    long newOffset = oldOffset + 100000;
    fileOffsets.set(0, new StreamFileOffset(fileOffset, newOffset));
    state.setState(fileOffsets);
    stateStore.save(state);
    // Verify the change
    state = stateStore.get(0L, 0);
    Assert.assertEquals(newOffset, Iterables.get(state.getState(), 0).getOffset());
    // Increase the number of instances again
    streamAdmin.configureInstances(streamId, 0L, 3);
    // Verify that instance 0 has offset getting resetted to lowest
    state = stateStore.get(0L, 0);
    Assert.assertEquals(oldOffset, Iterables.get(state.getState(), 0).getOffset());
    // Verify that no new file offsets state is being introduced (test a bug in the configureInstance implementation)
    Assert.assertEquals(4, Iterables.size(state.getState()));
    // Verify that all offsets are the same
    List<StreamConsumerState> states = Lists.newArrayList();
    stateStore.getByGroup(0L, states);
    Assert.assertEquals(3, states.size());
    Assert.assertTrue(Iterables.elementsEqual(states.get(0).getState(), states.get(1).getState()));
    Assert.assertTrue(Iterables.elementsEqual(states.get(0).getState(), states.get(2).getState()));
}
Also used : StreamId(co.cask.cdap.proto.id.StreamId) StreamFileOffset(co.cask.cdap.data.stream.StreamFileOffset) Test(org.junit.Test)

Example 52 with StreamId

use of co.cask.cdap.proto.id.StreamId in project cdap by caskdata.

the class StreamConsumerTestBase method testTTLStartingFile.

@Category(SlowTests.class)
@Test
public void testTTLStartingFile() throws Exception {
    String stream = "testTTLStartingFile";
    StreamId streamId = TEST_NAMESPACE.stream(stream);
    StreamAdmin streamAdmin = getStreamAdmin();
    // Create stream with ttl of 3 seconds and partition duration of 3 seconds
    final long ttl = TimeUnit.SECONDS.toMillis(3);
    Properties streamProperties = new Properties();
    streamProperties.setProperty(Constants.Stream.TTL, Long.toString(ttl));
    streamProperties.setProperty(Constants.Stream.PARTITION_DURATION, Long.toString(ttl));
    streamAdmin.create(streamId, streamProperties);
    StreamConfig streamConfig = streamAdmin.getConfig(streamId);
    streamAdmin.configureGroups(streamId, ImmutableMap.of(0L, 1, 1L, 1));
    StreamConsumerFactory consumerFactory = getConsumerFactory();
    StreamConsumer consumer = consumerFactory.create(streamId, stream, new ConsumerConfig(0L, 0, 1, DequeueStrategy.FIFO, null));
    StreamConsumer newConsumer;
    Set<StreamEvent> expectedEvents = Sets.newTreeSet(STREAM_EVENT_COMPARATOR);
    try {
        // Create a new consumer for second consumer verification.
        // Need to create consumer before write event because in HBase, creation of consumer took couple seconds.
        newConsumer = consumerFactory.create(streamId, stream, new ConsumerConfig(1L, 0, 1, DequeueStrategy.FIFO, null));
        // write 20 events in a partition that will be expired due to sleeping the TTL
        writeEvents(streamConfig, "Phase 0 expired event ", 20);
        Thread.sleep(ttl);
        verifyEvents(consumer, expectedEvents);
        // also verify for a new consumer
        try {
            verifyEvents(newConsumer, expectedEvents);
        } finally {
            newConsumer.close();
        }
        // Create a new consumer for second consumer verification (with clean state)
        // Need to create consumer before write event because in HBase, creation of consumer took couple seconds.
        streamAdmin.configureGroups(streamId, ImmutableMap.of(0L, 1));
        streamAdmin.configureGroups(streamId, ImmutableMap.of(0L, 1, 1L, 1));
        newConsumer = consumerFactory.create(streamId, stream, new ConsumerConfig(1L, 0, 1, DequeueStrategy.FIFO, null));
        // write 20 events in a partition and read it back immediately. They shouldn't expired.
        expectedEvents.addAll(writeEvents(streamConfig, "Phase 1 non-expired event ", 20));
        verifyEvents(consumer, expectedEvents);
        // also verify for a new consumer
        try {
            verifyEvents(newConsumer, expectedEvents);
        } finally {
            newConsumer.close();
        }
        // Create a new consumer for second consumer verification (with clean state)
        // Need to create consumer before write event because in HBase, creation of consumer took couple seconds.
        streamAdmin.configureGroups(streamId, ImmutableMap.of(0L, 1));
        streamAdmin.configureGroups(streamId, ImmutableMap.of(0L, 1, 1L, 1));
        newConsumer = consumerFactory.create(streamId, stream, new ConsumerConfig(1L, 0, 1, DequeueStrategy.FIFO, null));
        // write 20 events in a partition that will be expired due to sleeping the TTL
        // This will write to a new partition different then the first batch write.
        // Also, because it sleep TTL time, the previous batch write would also get expired.
        expectedEvents.clear();
        writeEvents(streamConfig, "Phase 2 expired event ", 20);
        Thread.sleep(ttl);
        verifyEvents(consumer, expectedEvents);
        // also verify for a new consumer
        try {
            verifyEvents(newConsumer, expectedEvents);
        } finally {
            newConsumer.close();
        }
        // Create a new consumer for second consumer verification (with clean state)
        // Need to create consumer before write event because in HBase, creation of consumer took couple seconds.
        streamAdmin.configureGroups(streamId, ImmutableMap.of(0L, 1));
        streamAdmin.configureGroups(streamId, ImmutableMap.of(0L, 1, 1L, 1));
        newConsumer = consumerFactory.create(streamId, stream, new ConsumerConfig(1L, 0, 1, DequeueStrategy.FIFO, null));
        // write 20 events in a partition and read it back immediately. They shouldn't expire.
        expectedEvents.addAll(writeEvents(streamConfig, "Phase 3 non-expired event ", 20));
        verifyEvents(consumer, expectedEvents);
        // also verify for a new consumer
        try {
            verifyEvents(newConsumer, expectedEvents);
        } finally {
            newConsumer.close();
        }
        // Should be no more pending events
        expectedEvents.clear();
        verifyEvents(consumer, expectedEvents);
    } finally {
        consumer.close();
    }
}
Also used : StreamId(co.cask.cdap.proto.id.StreamId) StreamEvent(co.cask.cdap.api.flow.flowlet.StreamEvent) ConsumerConfig(co.cask.cdap.data2.queue.ConsumerConfig) Properties(java.util.Properties) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Example 53 with StreamId

use of co.cask.cdap.proto.id.StreamId in project cdap by caskdata.

the class StreamConsumerTestBase method testFIFORollback.

@Test
public void testFIFORollback() throws Exception {
    String stream = "testFIFORollback";
    StreamId streamId = TEST_NAMESPACE.stream(stream);
    StreamAdmin streamAdmin = getStreamAdmin();
    streamAdmin.create(streamId);
    StreamConfig streamConfig = streamAdmin.getConfig(streamId);
    // Writes 5 events
    writeEvents(streamConfig, "Testing ", 5);
    streamAdmin.configureInstances(streamId, 0L, 2);
    StreamConsumerFactory consumerFactory = getConsumerFactory();
    StreamConsumer consumer0 = consumerFactory.create(streamId, "fifo.rollback", new ConsumerConfig(0L, 0, 2, DequeueStrategy.FIFO, null));
    StreamConsumer consumer1 = consumerFactory.create(streamId, "fifo.rollback", new ConsumerConfig(0L, 1, 2, DequeueStrategy.FIFO, null));
    // Try to dequeue using both consumers
    TransactionContext context0 = createTxContext(consumer0);
    TransactionContext context1 = createTxContext(consumer1);
    context0.start();
    context1.start();
    DequeueResult<StreamEvent> result0 = consumer0.poll(1, 1, TimeUnit.SECONDS);
    DequeueResult<StreamEvent> result1 = consumer1.poll(1, 1, TimeUnit.SECONDS);
    Assert.assertEquals("Testing 0", Charsets.UTF_8.decode(result0.iterator().next().getBody()).toString());
    Assert.assertEquals("Testing 1", Charsets.UTF_8.decode(result1.iterator().next().getBody()).toString());
    // Commit the first one, rollback the second one.
    context0.finish();
    context1.abort();
    // Dequeue again with the consuemrs
    context0.start();
    context1.start();
    result0 = consumer0.poll(1, 1, TimeUnit.SECONDS);
    result1 = consumer1.poll(1, 1, TimeUnit.SECONDS);
    // Expect consumer 0 keep proceeding while consumer 1 will retry with what it claimed in previous transaction.
    // This is the optimization in FIFO mode to avoid going back and rescanning.
    Assert.assertEquals("Testing 2", Charsets.UTF_8.decode(result0.iterator().next().getBody()).toString());
    Assert.assertEquals("Testing 1", Charsets.UTF_8.decode(result1.iterator().next().getBody()).toString());
    // Commit both
    context0.finish();
    context1.finish();
    consumer0.close();
    consumer1.close();
}
Also used : StreamId(co.cask.cdap.proto.id.StreamId) TransactionContext(org.apache.tephra.TransactionContext) StreamEvent(co.cask.cdap.api.flow.flowlet.StreamEvent) ConsumerConfig(co.cask.cdap.data2.queue.ConsumerConfig) Test(org.junit.Test)

Example 54 with StreamId

use of co.cask.cdap.proto.id.StreamId in project cdap by caskdata.

the class StreamTailer method main.

public static void main(String[] args) throws Exception {
    if (args.length < 1) {
        System.out.println(String.format("Usage: java %s [streamName]", StreamTailer.class.getName()));
        return;
    }
    String streamName = args[0];
    CConfiguration cConf = CConfiguration.create();
    Configuration hConf = new Configuration();
    String txClientId = StreamTailer.class.getName();
    Injector injector = Guice.createInjector(new ConfigModule(cConf, hConf), new DataFabricModules(txClientId).getDistributedModules(), new DataSetsModules().getDistributedModules(), new LocationRuntimeModule().getDistributedModules(), new ExploreClientModule(), new ViewAdminModules().getDistributedModules(), new StreamAdminModules().getDistributedModules(), new AuthorizationEnforcementModule().getDistributedModules(), new AuthenticationContextModules().getMasterModule(), new NotificationFeedClientModule());
    StreamAdmin streamAdmin = injector.getInstance(StreamAdmin.class);
    //TODO: get namespace from commandline arguments
    StreamId streamId = NamespaceId.DEFAULT.stream(streamName);
    StreamConfig streamConfig = streamAdmin.getConfig(streamId);
    Location streamLocation = streamConfig.getLocation();
    List<Location> eventFiles = Lists.newArrayList();
    for (Location partition : streamLocation.list()) {
        if (!partition.isDirectory()) {
            continue;
        }
        for (Location file : partition.list()) {
            if (StreamFileType.EVENT.isMatched(file.getName())) {
                eventFiles.add(file);
            }
        }
    }
    int generation = StreamUtils.getGeneration(streamConfig);
    MultiLiveStreamFileReader reader = new MultiLiveStreamFileReader(streamConfig, ImmutableList.copyOf(Iterables.transform(eventFiles, createOffsetConverter(generation))));
    List<StreamEvent> events = Lists.newArrayList();
    while (reader.read(events, 10, 100, TimeUnit.MILLISECONDS) >= 0) {
        for (StreamEvent event : events) {
            System.out.println(event.getTimestamp() + " " + Charsets.UTF_8.decode(event.getBody()));
        }
        events.clear();
    }
    reader.close();
}
Also used : StreamId(co.cask.cdap.proto.id.StreamId) CConfiguration(co.cask.cdap.common.conf.CConfiguration) Configuration(org.apache.hadoop.conf.Configuration) ConfigModule(co.cask.cdap.common.guice.ConfigModule) AuthenticationContextModules(co.cask.cdap.security.auth.context.AuthenticationContextModules) DataSetsModules(co.cask.cdap.data.runtime.DataSetsModules) StreamEvent(co.cask.cdap.api.flow.flowlet.StreamEvent) LocationRuntimeModule(co.cask.cdap.common.guice.LocationRuntimeModule) StreamConfig(co.cask.cdap.data2.transaction.stream.StreamConfig) CConfiguration(co.cask.cdap.common.conf.CConfiguration) ViewAdminModules(co.cask.cdap.data.view.ViewAdminModules) StreamAdmin(co.cask.cdap.data2.transaction.stream.StreamAdmin) ExploreClientModule(co.cask.cdap.explore.guice.ExploreClientModule) Injector(com.google.inject.Injector) NotificationFeedClientModule(co.cask.cdap.notifications.feeds.client.NotificationFeedClientModule) DataFabricModules(co.cask.cdap.data.runtime.DataFabricModules) AuthorizationEnforcementModule(co.cask.cdap.security.authorization.AuthorizationEnforcementModule) Location(org.apache.twill.filesystem.Location)

Example 55 with StreamId

use of co.cask.cdap.proto.id.StreamId in project cdap by caskdata.

the class MapReduceRuntimeService method setDecoderForStream.

private void setDecoderForStream(StreamInputFormatProvider streamProvider, Job job, Map<String, String> inputFormatConfiguration, Class<? extends Mapper> mapperClass) {
    // For stream, we need to do two extra steps.
    // 1. stream usage registration since it only happens on client side.
    // 2. Infer the stream event decoder from Mapper/Reducer
    TypeToken<?> mapperTypeToken = mapperClass == null ? null : resolveClass(mapperClass, Mapper.class);
    Type inputValueType = getInputValueType(job.getConfiguration(), StreamEvent.class, mapperTypeToken);
    streamProvider.setDecoderType(inputFormatConfiguration, inputValueType);
    StreamId streamId = streamProvider.getStreamId();
    try {
        streamAdmin.register(ImmutableList.of(context.getProgram().getId()), streamId);
        streamAdmin.addAccess(context.getProgram().getId().run(context.getRunId().getId()), streamId, AccessType.READ);
    } catch (Exception e) {
        LOG.warn("Failed to register usage {} -> {}", context.getProgram().getId(), streamId, e);
    }
}
Also used : Mapper(org.apache.hadoop.mapreduce.Mapper) ProgramType(co.cask.cdap.proto.ProgramType) AccessType(co.cask.cdap.data2.metadata.lineage.AccessType) ParameterizedType(java.lang.reflect.ParameterizedType) Type(java.lang.reflect.Type) StreamId(co.cask.cdap.proto.id.StreamId) ProvisionException(com.google.inject.ProvisionException) IOException(java.io.IOException) TransactionFailureException(org.apache.tephra.TransactionFailureException) URISyntaxException(java.net.URISyntaxException) TransactionConflictException(org.apache.tephra.TransactionConflictException)

Aggregations

StreamId (co.cask.cdap.proto.id.StreamId)166 Test (org.junit.Test)88 DatasetId (co.cask.cdap.proto.id.DatasetId)33 ProgramId (co.cask.cdap.proto.id.ProgramId)30 NamespaceId (co.cask.cdap.proto.id.NamespaceId)27 Path (javax.ws.rs.Path)27 StreamEvent (co.cask.cdap.api.flow.flowlet.StreamEvent)24 ApplicationId (co.cask.cdap.proto.id.ApplicationId)22 IOException (java.io.IOException)20 StreamProperties (co.cask.cdap.proto.StreamProperties)17 FormatSpecification (co.cask.cdap.api.data.format.FormatSpecification)16 StreamViewId (co.cask.cdap.proto.id.StreamViewId)16 Location (org.apache.twill.filesystem.Location)15 StreamConfig (co.cask.cdap.data2.transaction.stream.StreamConfig)12 NamespaceMeta (co.cask.cdap.proto.NamespaceMeta)12 StreamAdmin (co.cask.cdap.data2.transaction.stream.StreamAdmin)11 ViewSpecification (co.cask.cdap.proto.ViewSpecification)10 MetadataSearchResultRecord (co.cask.cdap.proto.metadata.MetadataSearchResultRecord)10 Action (co.cask.cdap.proto.security.Action)10 GET (javax.ws.rs.GET)10