use of co.cask.cdap.data2.transaction.stream.StreamConfig in project cdap by caskdata.
the class HBaseStreamConsumerStateStoreFactory method create.
@Override
public synchronized StreamConsumerStateStore create(StreamConfig streamConfig) throws IOException {
NamespaceId namespace = streamConfig.getStreamId().getParent();
TableId streamStateStoreTableId = StreamUtils.getStateStoreTableId(namespace);
TableId hbaseTableId = tableUtil.createHTableId(new NamespaceId(streamStateStoreTableId.getNamespace()), streamStateStoreTableId.getTableName());
boolean tableExist;
try (HBaseAdmin admin = new HBaseAdmin(hConf)) {
tableExist = tableUtil.tableExists(admin, hbaseTableId);
}
if (!tableExist) {
try (HBaseDDLExecutor ddlExecutor = ddlExecutorFactory.get()) {
TableDescriptorBuilder tdBuilder = HBaseTableUtil.getTableDescriptorBuilder(hbaseTableId, cConf);
ColumnFamilyDescriptorBuilder cfdBuilder = HBaseTableUtil.getColumnFamilyDescriptorBuilder(Bytes.toString(QueueEntryRow.COLUMN_FAMILY), hConf);
tdBuilder.addColumnFamily(cfdBuilder.build());
ddlExecutor.createTableIfNotExists(tdBuilder.build(), null);
}
}
HTable hTable = tableUtil.createHTable(hConf, hbaseTableId);
hTable.setWriteBufferSize(Constants.Stream.HBASE_WRITE_BUFFER_SIZE);
hTable.setAutoFlushTo(false);
return new HBaseStreamConsumerStateStore(streamConfig, hTable);
}
use of co.cask.cdap.data2.transaction.stream.StreamConfig in project cdap by caskdata.
the class AbstractStreamFileConsumerFactory method create.
@Override
public final StreamConsumer create(StreamId streamId, String namespace, ConsumerConfig consumerConfig) throws IOException {
StreamConfig streamConfig = StreamUtils.ensureExists(streamAdmin, streamId);
TableId tableId = getTableId(streamId, namespace);
StreamConsumerStateStore stateStore = stateStoreFactory.create(streamConfig);
StreamConsumerState consumerState = stateStore.get(consumerConfig.getGroupId(), consumerConfig.getInstanceId());
return create(tableId, streamConfig, consumerConfig, stateStore, consumerState, createReader(streamConfig, consumerState), new TTLReadFilter(streamConfig.getTTL()));
}
use of co.cask.cdap.data2.transaction.stream.StreamConfig in project cdap by caskdata.
the class MultiLiveStreamFileReaderTestBase method testMultiFileReader.
@Test
public void testMultiFileReader() throws Exception {
String streamName = "multiReader";
StreamId streamId = NamespaceId.DEFAULT.stream(streamName);
Location location = getLocationFactory().create(streamName);
location.mkdirs();
// Create a stream with 1 partition.
StreamConfig config = new StreamConfig(streamId, Long.MAX_VALUE, 10000, Long.MAX_VALUE, location, null, 1000);
// Write out 200 events in 5 files, with interleaving timestamps
List<FileWriter<StreamEvent>> writers = Lists.newArrayList();
for (int i = 0; i < 5; i++) {
FileWriter<StreamEvent> writer = createWriter(config, "bucket" + i);
writers.add(writer);
for (int j = 0; j < 200; j++) {
long timestamp = j * 5 + i;
writer.append(StreamFileTestUtils.createEvent(timestamp, "Testing " + timestamp));
}
}
// Flush all writers.
for (FileWriter<StreamEvent> writer : writers) {
writer.flush();
}
// Create a multi stream file reader
List<StreamFileOffset> sources = Lists.newArrayList();
Location partitionLocation = StreamUtils.createPartitionLocation(config.getLocation(), 0, Long.MAX_VALUE);
for (int i = 0; i < 5; i++) {
Location eventFile = StreamUtils.createStreamLocation(partitionLocation, "bucket" + i, 0, StreamFileType.EVENT);
sources.add(new StreamFileOffset(eventFile, 0L, 0));
}
// Reads all events written so far.
MultiLiveStreamFileReader reader = new MultiLiveStreamFileReader(config, sources);
List<StreamEvent> events = Lists.newArrayList();
long expectedTimestamp = 0L;
for (int i = 0; i < 10; i++) {
Assert.assertEquals(100, reader.read(events, 100, 0, TimeUnit.SECONDS));
Assert.assertEquals(100, events.size());
for (StreamEvent event : events) {
Assert.assertEquals(expectedTimestamp, event.getTimestamp());
Assert.assertEquals("Testing " + expectedTimestamp, Charsets.UTF_8.decode(event.getBody()).toString());
expectedTimestamp++;
}
events.clear();
}
Assert.assertEquals(0, reader.read(events, 1, 1, TimeUnit.SECONDS));
// Writes some more events to the first three writers.
for (int i = 0; i < 3; i++) {
FileWriter<StreamEvent> writer = writers.get(i);
for (int j = 0; j < 10; j++) {
long timestamp = 1000 + j * 3 + i;
writer.append(StreamFileTestUtils.createEvent(timestamp, "Testing " + timestamp));
}
}
// Close all writers
for (FileWriter<StreamEvent> writer : writers) {
writer.close();
}
// Continue to read
Assert.assertEquals(30, reader.read(events, 30, 2, TimeUnit.SECONDS));
Assert.assertEquals(30, events.size());
for (StreamEvent event : events) {
Assert.assertEquals(expectedTimestamp, event.getTimestamp());
Assert.assertEquals("Testing " + expectedTimestamp, Charsets.UTF_8.decode(event.getBody()).toString());
expectedTimestamp++;
}
// Should get no more events.
Assert.assertEquals(0, reader.read(events, 1, 1, TimeUnit.SECONDS));
reader.close();
}
use of co.cask.cdap.data2.transaction.stream.StreamConfig in project cdap by caskdata.
the class StreamDataFileTestBase method testLiveStream.
/**
* Test live stream reader with new partitions and/or sequence file being created over time.
*/
@Category(SlowTests.class)
@Test
public void testLiveStream() throws Exception {
String streamName = "live";
StreamId streamId = NamespaceId.DEFAULT.stream(streamName);
final String filePrefix = "prefix";
// 5 seconds
long partitionDuration = 5000;
Location location = getLocationFactory().create(streamName);
location.mkdirs();
final StreamConfig config = new StreamConfig(streamId, partitionDuration, 10000, Long.MAX_VALUE, location, null, 1000);
// Create a thread that will write 10 event per second
final AtomicInteger eventsWritten = new AtomicInteger();
final List<Closeable> closeables = Lists.newArrayList();
Thread writerThread = new Thread() {
@Override
public void run() {
try {
while (!interrupted()) {
FileWriter<StreamEvent> writer = createWriter(config, filePrefix);
closeables.add(writer);
for (int i = 0; i < 10; i++) {
long ts = System.currentTimeMillis();
writer.append(StreamFileTestUtils.createEvent(ts, "Testing"));
eventsWritten.getAndIncrement();
}
writer.flush();
TimeUnit.SECONDS.sleep(1);
}
} catch (IOException e) {
LOG.error(e.getMessage(), e);
throw Throwables.propagate(e);
} catch (InterruptedException e) {
// No-op
}
}
};
// Create a live reader start with one partition earlier than current time.
long partitionStart = StreamUtils.getPartitionStartTime(System.currentTimeMillis() - config.getPartitionDuration(), config.getPartitionDuration());
Location partitionLocation = StreamUtils.createPartitionLocation(config.getLocation(), partitionStart, config.getPartitionDuration());
Location eventLocation = StreamUtils.createStreamLocation(partitionLocation, filePrefix, 0, StreamFileType.EVENT);
// Creates a live stream reader that check for sequence file ever 100 millis.
FileReader<PositionStreamEvent, StreamFileOffset> reader = new LiveStreamFileReader(config, new StreamFileOffset(eventLocation, 0L, 0), 100);
List<StreamEvent> events = Lists.newArrayList();
// Try to read, since the writer thread is not started, it should get nothing
Assert.assertEquals(0, reader.read(events, 1, 2, TimeUnit.SECONDS));
// Start the writer thread.
writerThread.start();
Stopwatch stopwatch = new Stopwatch();
stopwatch.start();
while (stopwatch.elapsedTime(TimeUnit.SECONDS) < 10 && reader.read(events, 1, 1, TimeUnit.SECONDS) == 0) {
// Empty
}
stopwatch.stop();
// Should be able to read a event
Assert.assertEquals(1, events.size());
TimeUnit.MILLISECONDS.sleep(partitionDuration * 2);
writerThread.interrupt();
writerThread.join();
LOG.info("Writer stopped with {} events written.", eventsWritten.get());
stopwatch.reset();
while (stopwatch.elapsedTime(TimeUnit.SECONDS) < 10 && events.size() != eventsWritten.get()) {
reader.read(events, eventsWritten.get(), 0, TimeUnit.SECONDS);
}
// Should see all events written
Assert.assertEquals(eventsWritten.get(), events.size());
// Take a snapshot of the offset.
StreamFileOffset offset = new StreamFileOffset(reader.getPosition());
reader.close();
for (Closeable c : closeables) {
Closeables.closeQuietly(c);
}
// Now creates a new writer to write 10 more events across two partitions with a skip one partition.
try (FileWriter<StreamEvent> writer = createWriter(config, filePrefix)) {
for (int i = 0; i < 5; i++) {
long ts = System.currentTimeMillis();
writer.append(StreamFileTestUtils.createEvent(ts, "Testing " + ts));
}
TimeUnit.MILLISECONDS.sleep(partitionDuration * 3 / 2);
for (int i = 0; i < 5; i++) {
long ts = System.currentTimeMillis();
writer.append(StreamFileTestUtils.createEvent(ts, "Testing " + ts));
}
}
// Create a new reader with the previous offset
reader = new LiveStreamFileReader(config, offset, 100);
events.clear();
stopwatch.reset();
while (stopwatch.elapsedTime(TimeUnit.SECONDS) < 10 && events.size() != 10) {
reader.read(events, 10, 0, TimeUnit.SECONDS);
}
Assert.assertEquals(10, events.size());
// Try to read more, should got nothing
reader.read(events, 10, 2, TimeUnit.SECONDS);
reader.close();
for (Closeable c : closeables) {
c.close();
}
}
use of co.cask.cdap.data2.transaction.stream.StreamConfig in project cdap by caskdata.
the class StreamConsumerTestBase method testNamespacedStreamConsumers.
@Test
public void testNamespacedStreamConsumers() throws Exception {
// Test two consumers for two streams with the same name, but in different namespaces. Their consumption should be
// independent of the other.
String stream = "testNamespacedStreamConsumers";
StreamId streamId = TEST_NAMESPACE.stream(stream);
StreamId otherStreamId = OTHER_NAMESPACE.stream(stream);
StreamAdmin streamAdmin = getStreamAdmin();
streamAdmin.create(streamId);
streamAdmin.create(otherStreamId);
StreamConfig streamConfig = streamAdmin.getConfig(streamId);
StreamConfig otherStreamConfig = streamAdmin.getConfig(otherStreamId);
// Writes 5 events to both streams
writeEvents(streamConfig, "Testing ", 5);
writeEvents(otherStreamConfig, "Testing ", 5);
streamAdmin.configureInstances(streamId, 0L, 1);
streamAdmin.configureInstances(otherStreamId, 0L, 1);
StreamConsumerFactory consumerFactory = getConsumerFactory();
StreamConsumer consumer = consumerFactory.create(streamId, "fifo.rollback", new ConsumerConfig(0L, 0, 1, DequeueStrategy.FIFO, null));
StreamConsumer otherConsumer = consumerFactory.create(otherStreamId, "fifo.rollback", new ConsumerConfig(0L, 0, 1, DequeueStrategy.FIFO, null));
// Try to dequeue using both consumers
TransactionContext context = createTxContext(consumer);
TransactionContext otherContext = createTxContext(otherConsumer);
context.start();
otherContext.start();
// Consume events from the stream in the default namespace
DequeueResult<StreamEvent> result0 = consumer.poll(1, 1, TimeUnit.SECONDS);
Assert.assertEquals("Testing 0", Charsets.UTF_8.decode(result0.iterator().next().getBody()).toString());
context.finish();
context.start();
result0 = consumer.poll(1, 1, TimeUnit.SECONDS);
Assert.assertEquals("Testing 1", Charsets.UTF_8.decode(result0.iterator().next().getBody()).toString());
context.finish();
context.start();
result0 = consumer.poll(1, 1, TimeUnit.SECONDS);
Assert.assertEquals("Testing 2", Charsets.UTF_8.decode(result0.iterator().next().getBody()).toString());
context.finish();
context.start();
// Even though a stream with the same name has already consumed 3 events, the otherConsumer is for a stream in a
// different namespace, so it will still be on the initial event.
DequeueResult<StreamEvent> result1 = otherConsumer.poll(1, 1, TimeUnit.SECONDS);
Assert.assertEquals("Testing 0", Charsets.UTF_8.decode(result1.iterator().next().getBody()).toString());
otherContext.finish();
otherContext.start();
result0 = consumer.poll(1, 1, TimeUnit.SECONDS);
result1 = otherConsumer.poll(1, 1, TimeUnit.SECONDS);
Assert.assertEquals("Testing 3", Charsets.UTF_8.decode(result0.iterator().next().getBody()).toString());
Assert.assertEquals("Testing 1", Charsets.UTF_8.decode(result1.iterator().next().getBody()).toString());
// Commit both
context.finish();
otherContext.finish();
consumer.close();
otherConsumer.close();
}
Aggregations