Search in sources :

Example 81 with StreamEvent

use of co.cask.cdap.api.flow.flowlet.StreamEvent in project cdap by caskdata.

the class StreamDataFileTestBase method testTTLFilter.

@Test
public void testTTLFilter() throws IOException, InterruptedException {
    // Test the TTL filter by writing events with different timestamp and use the TTL to control what
    // events to read.
    Location dir = StreamFileTestUtils.createTempDir(getLocationFactory());
    Location eventFile = dir.getTempFile(".dat");
    Location indexFile = dir.getTempFile(".idx");
    // Writer 10 events, with 10 different timestamps, differ by 5, starting from 1.
    // ts = {1, 6, 11, 16, 21, 26, 31, 36, 41, 46 }
    StreamDataFileWriter writer = new StreamDataFileWriter(Locations.newOutputSupplier(eventFile), Locations.newOutputSupplier(indexFile), 20L);
    long ts = 1L;
    for (int i = 0; i < 10; i++, ts += 5) {
        writer.append(StreamFileTestUtils.createEvent(ts, "Testing " + i));
    }
    // Just flush writer, keep the write live to keep writing more events down below.
    writer.flush();
    List<StreamEvent> events = Lists.newArrayList();
    // Create a reader
    try (StreamDataFileReader reader = StreamDataFileReader.createByStartTime(Locations.newInputSupplier(eventFile), Locations.newInputSupplier(indexFile), 0L)) {
        // Read with a TTL filter. The TTL makes the first valid event as TS >= 25, hence TS == 26.
        reader.read(events, 1, 0, TimeUnit.SECONDS, new TTLReadFilter(0) {

            @Override
            protected long getCurrentTime() {
                return 25L;
            }
        });
        Assert.assertEquals(1, events.size());
        Assert.assertEquals(26L, events.get(0).getTimestamp());
        // Read with TTL filter that will skip all reaming events in the stream (TTL = 0).
        events.clear();
        reader.read(events, 1, 0, TimeUnit.SECONDS, new TTLReadFilter(0));
        Assert.assertTrue(events.isEmpty());
        // Write 5 more event, with TS starts at 56
        for (int i = 0; i < 5; i++, ts += 5) {
            writer.append(StreamFileTestUtils.createEvent(ts, "Testing " + i));
        }
        writer.close();
        // Read with TTL filter that makes only the last event pass (TS = 76)
        events.clear();
        reader.read(events, 10, 0, TimeUnit.SECONDS, new TTLReadFilter(0) {

            @Override
            protected long getCurrentTime() {
                return 71L;
            }
        });
        Assert.assertEquals(1, events.size());
        Assert.assertEquals(71L, events.get(0).getTimestamp());
    }
}
Also used : StreamEvent(co.cask.cdap.api.flow.flowlet.StreamEvent) TTLReadFilter(co.cask.cdap.data.file.filter.TTLReadFilter) Location(org.apache.twill.filesystem.Location) Test(org.junit.Test)

Example 82 with StreamEvent

use of co.cask.cdap.api.flow.flowlet.StreamEvent in project cdap by caskdata.

the class TimePartitionedStreamTestBase method testAppendAll.

@Test
public void testAppendAll() throws IOException {
    // Create time partition file of 1 seconds each.
    String streamName = "testAppendAll";
    Location streamLocation = getLocationFactory().create(streamName);
    streamLocation.mkdirs();
    TimePartitionedStreamFileWriter writer = new TimePartitionedStreamFileWriter(streamLocation, 1000, "file", 100, new StreamId(NamespaceId.DEFAULT.getNamespace(), streamName), impersonator);
    // Write 2 events per millis for 3 seconds, starting at 0.5 second.
    List<StreamEvent> events = Lists.newArrayList();
    long timeBase = 500;
    for (int i = 0; i < 3; i++) {
        for (int j = 0; j < 1000; j++) {
            long offset = i * 1000 + j;
            long timestamp = timeBase + offset;
            events.add(StreamFileTestUtils.createEvent(timestamp, "Testing " + offset + " 0"));
            events.add(StreamFileTestUtils.createEvent(timestamp, "Testing " + offset + " 1"));
        }
    }
    writer.appendAll(events.iterator());
    writer.close();
    // There should be four partition directory (500-1000, 1000-2000, 2000-3000, 3000-3500).
    List<Location> partitionDirs = Lists.newArrayList(streamLocation.list());
    Assert.assertEquals(4, partitionDirs.size());
    // The start time for the partitions should be 0, 1000, 2000, 3000
    Collections.sort(partitionDirs, Locations.LOCATION_COMPARATOR);
    for (int i = 0; i < 4; i++) {
        Assert.assertEquals(i * 1000, StreamUtils.getPartitionStartTime(partitionDirs.get(i).getName()));
    }
}
Also used : StreamId(co.cask.cdap.proto.id.StreamId) StreamEvent(co.cask.cdap.api.flow.flowlet.StreamEvent) Location(org.apache.twill.filesystem.Location) Test(org.junit.Test)

Example 83 with StreamEvent

use of co.cask.cdap.api.flow.flowlet.StreamEvent in project cdap by caskdata.

the class StreamEventTypeAdapterTest method testAdapter.

@Test
public void testAdapter() throws IOException {
    // Writes bunch of stream events, serialize them into json and read them. Check if the read back is the
    // same as the original list.
    Gson gson = StreamEventTypeAdapter.register(new GsonBuilder()).create();
    List<StreamEvent> events = Lists.newArrayList();
    for (int i = 0; i < 10; i++) {
        events.add(new StreamEvent(ImmutableMap.of("k" + i, "v" + i), Charsets.UTF_8.encode("Msg " + i), i));
    }
    List<StreamEvent> decoded = gson.fromJson(gson.toJson(events), new TypeToken<List<StreamEvent>>() {
    }.getType());
    // The decoded events should be the same as the original events
    Iterator<StreamEvent> it1 = events.iterator();
    Iterator<StreamEvent> it2 = decoded.iterator();
    while (it1.hasNext() && it2.hasNext()) {
        Assert.assertTrue(STREAM_EVENT_EQUIVALENCE.equivalent(it1.next(), it2.next()));
    }
    // Both iterator should be emptied.
    Assert.assertEquals(it1.hasNext(), it2.hasNext());
}
Also used : GsonBuilder(com.google.gson.GsonBuilder) TypeToken(com.google.common.reflect.TypeToken) StreamEvent(co.cask.cdap.api.flow.flowlet.StreamEvent) Gson(com.google.gson.Gson) Test(org.junit.Test)

Example 84 with StreamEvent

use of co.cask.cdap.api.flow.flowlet.StreamEvent in project cdap by caskdata.

the class ASMDatumCodecTest method testStreamEvent.

@Test
public void testStreamEvent() throws IOException, UnsupportedTypeException {
    TypeToken<StreamEvent> type = new TypeToken<StreamEvent>() {
    };
    PipedOutputStream os = new PipedOutputStream();
    PipedInputStream is = new PipedInputStream(os);
    DatumWriter<StreamEvent> writer = getWriter(type);
    StreamEvent event = new StreamEvent(ImmutableMap.of("key", "value"), ByteBuffer.wrap("Testing message".getBytes(Charsets.UTF_8)));
    writer.encode(event, new BinaryEncoder(os));
    ReflectionDatumReader<StreamEvent> reader = new ReflectionDatumReader<>(getSchema(type), type);
    StreamEvent value = reader.read(new BinaryDecoder(is), getSchema(type));
    Assert.assertEquals(event.getHeaders(), value.getHeaders());
    Assert.assertEquals(event.getBody(), value.getBody());
}
Also used : BinaryEncoder(co.cask.cdap.common.io.BinaryEncoder) TypeToken(com.google.common.reflect.TypeToken) StreamEvent(co.cask.cdap.api.flow.flowlet.StreamEvent) PipedOutputStream(java.io.PipedOutputStream) ReflectionDatumReader(co.cask.cdap.internal.io.ReflectionDatumReader) PipedInputStream(java.io.PipedInputStream) BinaryDecoder(co.cask.cdap.common.io.BinaryDecoder) Test(org.junit.Test)

Aggregations

StreamEvent (co.cask.cdap.api.flow.flowlet.StreamEvent)84 Test (org.junit.Test)65 Location (org.apache.twill.filesystem.Location)27 StreamId (co.cask.cdap.proto.id.StreamId)24 StructuredRecord (co.cask.cdap.api.data.format.StructuredRecord)19 FormatSpecification (co.cask.cdap.api.data.format.FormatSpecification)17 Schema (co.cask.cdap.api.data.schema.Schema)10 IOException (java.io.IOException)9 StreamConfig (co.cask.cdap.data2.transaction.stream.StreamConfig)8 ByteBuffer (java.nio.ByteBuffer)8 ConsumerConfig (co.cask.cdap.data2.queue.ConsumerConfig)7 StreamAdmin (co.cask.cdap.data2.transaction.stream.StreamAdmin)6 TransactionContext (org.apache.tephra.TransactionContext)6 BinaryDecoder (co.cask.cdap.common.io.BinaryDecoder)5 TypeToken (com.google.common.reflect.TypeToken)5 StreamEventCodec (co.cask.cdap.common.stream.StreamEventCodec)4 IdentityStreamEventDecoder (co.cask.cdap.data.stream.decoder.IdentityStreamEventDecoder)4 File (java.io.File)4 SchemaHash (co.cask.cdap.api.data.schema.SchemaHash)3 QueueName (co.cask.cdap.common.queue.QueueName)3