Search in sources :

Example 36 with StreamEvent

use of co.cask.cdap.api.flow.flowlet.StreamEvent in project cdap by caskdata.

the class InMemoryStreamFileWriterFactory method create.

@Override
public FileWriter<StreamEvent> create(StreamConfig config, int generation) throws IOException {
    final QueueProducer producer = queueClientFactory.createProducer(QueueName.fromStream(config.getStreamId()));
    final List<TransactionAware> txAwares = Lists.newArrayList();
    if (producer instanceof TransactionAware) {
        txAwares.add((TransactionAware) producer);
    }
    final TransactionExecutor txExecutor = executorFactory.createExecutor(txAwares);
    // Adapt the FileWriter interface into Queue2Producer
    return new FileWriter<StreamEvent>() {

        private final List<StreamEvent> events = Lists.newArrayList();

        @Override
        public void append(StreamEvent event) throws IOException {
            events.add(event);
        }

        @Override
        public void appendAll(Iterator<? extends StreamEvent> events) throws IOException {
            Iterators.addAll(this.events, events);
        }

        @Override
        public void close() throws IOException {
            producer.close();
        }

        @Override
        public void flush() throws IOException {
            try {
                txExecutor.execute(new TransactionExecutor.Subroutine() {

                    @Override
                    public void apply() throws Exception {
                        for (StreamEvent event : events) {
                            producer.enqueue(new QueueEntry(STREAM_EVENT_CODEC.encodePayload(event)));
                        }
                        events.clear();
                    }
                });
            } catch (TransactionFailureException e) {
                throw new IOException(e);
            } catch (InterruptedException e) {
                Thread.currentThread().interrupt();
                throw new InterruptedIOException();
            }
        }
    };
}
Also used : InterruptedIOException(java.io.InterruptedIOException) FileWriter(co.cask.cdap.data.file.FileWriter) StreamEvent(co.cask.cdap.api.flow.flowlet.StreamEvent) TransactionExecutor(org.apache.tephra.TransactionExecutor) IOException(java.io.IOException) InterruptedIOException(java.io.InterruptedIOException) QueueEntry(co.cask.cdap.data2.queue.QueueEntry) TransactionFailureException(org.apache.tephra.TransactionFailureException) IOException(java.io.IOException) InterruptedIOException(java.io.InterruptedIOException) TransactionFailureException(org.apache.tephra.TransactionFailureException) QueueProducer(co.cask.cdap.data2.queue.QueueProducer) TransactionAware(org.apache.tephra.TransactionAware) Iterator(java.util.Iterator) List(java.util.List)

Example 37 with StreamEvent

use of co.cask.cdap.api.flow.flowlet.StreamEvent in project cdap by caskdata.

the class TestFrameworkTestRun method testBatchStreamUpload.

@Test
public void testBatchStreamUpload() throws Exception {
    StreamManager batchStream = getStreamManager("batchStream");
    batchStream.createStream();
    String event1 = "this,is,some";
    String event2 = "test,csv,data";
    String event3 = "that,can,be,used,to,test";
    String event4 = "batch,upload,capability";
    String event5 = "for,streams in testbase";
    File testData = TEMP_FOLDER.newFile("test-stream-data.txt");
    try (FileWriter fileWriter = new FileWriter(testData);
        BufferedWriter out = new BufferedWriter(fileWriter)) {
        out.write(String.format("%s\n", event1));
        out.write(String.format("%s\n", event2));
        out.write(String.format("%s\n", event3));
        out.write(String.format("%s\n", event4));
        out.write(String.format("%s\n", event5));
    }
    // Batch upload file containing 10 events
    batchStream.send(testData, "text/csv");
    // Verify upload
    List<StreamEvent> uploadedEvents = batchStream.getEvents(0, System.currentTimeMillis(), 100);
    Assert.assertEquals(5, uploadedEvents.size());
    Assert.assertEquals(event1, Bytes.toString(uploadedEvents.get(0).getBody()));
    Assert.assertEquals(event2, Bytes.toString(uploadedEvents.get(1).getBody()));
    Assert.assertEquals(event3, Bytes.toString(uploadedEvents.get(2).getBody()));
    Assert.assertEquals(event4, Bytes.toString(uploadedEvents.get(3).getBody()));
    Assert.assertEquals(event5, Bytes.toString(uploadedEvents.get(4).getBody()));
}
Also used : StreamManager(co.cask.cdap.test.StreamManager) FileWriter(java.io.FileWriter) StreamEvent(co.cask.cdap.api.flow.flowlet.StreamEvent) File(java.io.File) BufferedWriter(java.io.BufferedWriter) Test(org.junit.Test)

Example 38 with StreamEvent

use of co.cask.cdap.api.flow.flowlet.StreamEvent in project cdap by caskdata.

the class FlowTest method testFlow.

@Test
public void testFlow() throws Exception {
    final ApplicationWithPrograms app = AppFabricTestHelper.deployApplicationWithManager(WordCountApp.class, TEMP_FOLDER_SUPPLIER);
    List<ProgramController> controllers = Lists.newArrayList();
    for (ProgramDescriptor programDescriptor : app.getPrograms()) {
        // running mapreduce is out of scope of this tests (there's separate unit-test for that)
        if (programDescriptor.getProgramId().getType() == ProgramType.MAPREDUCE) {
            continue;
        }
        controllers.add(AppFabricTestHelper.submit(app, programDescriptor.getSpecification().getClassName(), new BasicArguments(), TEMP_FOLDER_SUPPLIER));
    }
    TimeUnit.SECONDS.sleep(1);
    TransactionSystemClient txSystemClient = AppFabricTestHelper.getInjector().getInstance(TransactionSystemClient.class);
    QueueName queueName = QueueName.fromStream(app.getApplicationId().getNamespace(), "text");
    QueueClientFactory queueClientFactory = AppFabricTestHelper.getInjector().getInstance(QueueClientFactory.class);
    QueueProducer producer = queueClientFactory.createProducer(queueName);
    // start tx to write in queue in tx
    Transaction tx = txSystemClient.startShort();
    ((TransactionAware) producer).startTx(tx);
    StreamEventCodec codec = new StreamEventCodec();
    for (int i = 0; i < 10; i++) {
        String msg = "Testing message " + i;
        StreamEvent event = new StreamEvent(ImmutableMap.<String, String>of(), ByteBuffer.wrap(msg.getBytes(Charsets.UTF_8)));
        producer.enqueue(new QueueEntry(codec.encodePayload(event)));
    }
    // commit tx
    ((TransactionAware) producer).commitTx();
    txSystemClient.commitOrThrow(tx);
    // Query the service for at most 10 seconds for the expected result
    Gson gson = new Gson();
    DiscoveryServiceClient discoveryServiceClient = AppFabricTestHelper.getInjector().getInstance(DiscoveryServiceClient.class);
    ServiceDiscovered serviceDiscovered = discoveryServiceClient.discover(String.format("service.%s.%s.%s", DefaultId.NAMESPACE.getNamespace(), "WordCountApp", "WordFrequencyService"));
    EndpointStrategy endpointStrategy = new RandomEndpointStrategy(serviceDiscovered);
    int trials = 0;
    while (trials++ < 10) {
        Discoverable discoverable = endpointStrategy.pick(2, TimeUnit.SECONDS);
        URL url = new URL(String.format("http://%s:%d/v3/namespaces/default/apps/%s/services/%s/methods/%s/%s", discoverable.getSocketAddress().getHostName(), discoverable.getSocketAddress().getPort(), "WordCountApp", "WordFrequencyService", "wordfreq", "text:Testing"));
        try {
            HttpURLConnection urlConn = (HttpURLConnection) url.openConnection();
            Map<String, Long> responseContent = gson.fromJson(new InputStreamReader(urlConn.getInputStream(), Charsets.UTF_8), new TypeToken<Map<String, Long>>() {
            }.getType());
            LOG.info("Service response: " + responseContent);
            if (ImmutableMap.of("text:Testing", 10L).equals(responseContent)) {
                break;
            }
        } catch (Throwable t) {
            LOG.info("Exception when trying to query service.", t);
        }
        TimeUnit.SECONDS.sleep(1);
    }
    Assert.assertTrue(trials < 10);
    for (ProgramController controller : controllers) {
        controller.stop().get();
    }
}
Also used : DiscoveryServiceClient(org.apache.twill.discovery.DiscoveryServiceClient) Gson(com.google.gson.Gson) URL(java.net.URL) TransactionSystemClient(org.apache.tephra.TransactionSystemClient) StreamEventCodec(co.cask.cdap.common.stream.StreamEventCodec) HttpURLConnection(java.net.HttpURLConnection) QueueProducer(co.cask.cdap.data2.queue.QueueProducer) EndpointStrategy(co.cask.cdap.common.discovery.EndpointStrategy) RandomEndpointStrategy(co.cask.cdap.common.discovery.RandomEndpointStrategy) ApplicationWithPrograms(co.cask.cdap.internal.app.deploy.pipeline.ApplicationWithPrograms) ProgramDescriptor(co.cask.cdap.app.program.ProgramDescriptor) BasicArguments(co.cask.cdap.internal.app.runtime.BasicArguments) QueueName(co.cask.cdap.common.queue.QueueName) ProgramController(co.cask.cdap.app.runtime.ProgramController) Discoverable(org.apache.twill.discovery.Discoverable) InputStreamReader(java.io.InputStreamReader) StreamEvent(co.cask.cdap.api.flow.flowlet.StreamEvent) ServiceDiscovered(org.apache.twill.discovery.ServiceDiscovered) QueueEntry(co.cask.cdap.data2.queue.QueueEntry) Transaction(org.apache.tephra.Transaction) TransactionAware(org.apache.tephra.TransactionAware) TypeToken(com.google.common.reflect.TypeToken) QueueClientFactory(co.cask.cdap.data2.queue.QueueClientFactory) RandomEndpointStrategy(co.cask.cdap.common.discovery.RandomEndpointStrategy) Test(org.junit.Test)

Example 39 with StreamEvent

use of co.cask.cdap.api.flow.flowlet.StreamEvent in project cdap by caskdata.

the class OpenCloseDataSetTest method testDataSetsAreClosed.

@Test(timeout = 120000)
public void testDataSetsAreClosed() throws Exception {
    final String tableName = "foo";
    TrackingTable.resetTracker();
    ApplicationWithPrograms app = AppFabricTestHelper.deployApplicationWithManager(DummyAppWithTrackingTable.class, TEMP_FOLDER_SUPPLIER);
    List<ProgramController> controllers = Lists.newArrayList();
    // start the programs
    for (ProgramDescriptor programDescriptor : app.getPrograms()) {
        if (programDescriptor.getProgramId().getType().equals(ProgramType.MAPREDUCE)) {
            continue;
        }
        controllers.add(AppFabricTestHelper.submit(app, programDescriptor.getSpecification().getClassName(), new BasicArguments(), TEMP_FOLDER_SUPPLIER));
    }
    // write some data to queue
    TransactionSystemClient txSystemClient = AppFabricTestHelper.getInjector().getInstance(TransactionSystemClient.class);
    QueueName queueName = QueueName.fromStream(app.getApplicationId().getNamespace(), "xx");
    QueueClientFactory queueClientFactory = AppFabricTestHelper.getInjector().getInstance(QueueClientFactory.class);
    QueueProducer producer = queueClientFactory.createProducer(queueName);
    // start tx to write in queue in tx
    Transaction tx = txSystemClient.startShort();
    ((TransactionAware) producer).startTx(tx);
    StreamEventCodec codec = new StreamEventCodec();
    for (int i = 0; i < 4; i++) {
        String msg = "x" + i;
        StreamEvent event = new StreamEvent(ImmutableMap.<String, String>of(), ByteBuffer.wrap(msg.getBytes(Charsets.UTF_8)));
        producer.enqueue(new QueueEntry(codec.encodePayload(event)));
    }
    // commit tx
    ((TransactionAware) producer).commitTx();
    txSystemClient.commitOrThrow(tx);
    while (TrackingTable.getTracker(tableName, "write") < 4) {
        TimeUnit.MILLISECONDS.sleep(50);
    }
    // get the number of writes to the foo table
    Assert.assertEquals(4, TrackingTable.getTracker(tableName, "write"));
    // only 2 "open" calls should be tracked:
    // 1. the flow has started with single flowlet (service is loaded lazily on 1st request)
    // 2. DatasetSystemMetadataWriter also instantiates the dataset because it needs to add some system tags
    // for the dataset
    Assert.assertEquals(2, TrackingTable.getTracker(tableName, "open"));
    // now send a request to the service
    Gson gson = new Gson();
    DiscoveryServiceClient discoveryServiceClient = AppFabricTestHelper.getInjector().getInstance(DiscoveryServiceClient.class);
    Discoverable discoverable = new RandomEndpointStrategy(discoveryServiceClient.discover(String.format("service.%s.%s.%s", DefaultId.NAMESPACE.getEntityName(), "dummy", "DummyService"))).pick(5, TimeUnit.SECONDS);
    Assert.assertNotNull(discoverable);
    HttpClient client = new DefaultHttpClient();
    HttpGet get = new HttpGet(String.format("http://%s:%d/v3/namespaces/default/apps/%s/services/%s/methods/%s", discoverable.getSocketAddress().getHostName(), discoverable.getSocketAddress().getPort(), "dummy", "DummyService", "x1"));
    HttpResponse response = client.execute(get);
    String responseContent = gson.fromJson(new InputStreamReader(response.getEntity().getContent(), Charsets.UTF_8), String.class);
    client.getConnectionManager().shutdown();
    Assert.assertEquals("x1", responseContent);
    // now the dataset must have a read and another open operation
    Assert.assertEquals(1, TrackingTable.getTracker(tableName, "read"));
    Assert.assertEquals(3, TrackingTable.getTracker(tableName, "open"));
    // The dataset that was instantiated by the DatasetSystemMetadataWriter should have been closed
    Assert.assertEquals(1, TrackingTable.getTracker(tableName, "close"));
    // stop all programs, they should both close the data set foo
    for (ProgramController controller : controllers) {
        controller.stop().get();
    }
    int timesOpened = TrackingTable.getTracker(tableName, "open");
    Assert.assertTrue(timesOpened >= 2);
    Assert.assertEquals(timesOpened, TrackingTable.getTracker(tableName, "close"));
    // now start the m/r job
    ProgramController controller = null;
    for (ProgramDescriptor programDescriptor : app.getPrograms()) {
        if (programDescriptor.getProgramId().getType().equals(ProgramType.MAPREDUCE)) {
            controller = AppFabricTestHelper.submit(app, programDescriptor.getSpecification().getClassName(), new BasicArguments(), TEMP_FOLDER_SUPPLIER);
        }
    }
    Assert.assertNotNull(controller);
    while (!controller.getState().equals(ProgramController.State.COMPLETED)) {
        TimeUnit.MILLISECONDS.sleep(100);
    }
    // M/r job is done, one mapper and the m/r client should have opened and closed the data set foo
    // we don't know the exact number of times opened, but it is at least once, and it must be closed the same number
    // of times.
    Assert.assertTrue(timesOpened < TrackingTable.getTracker(tableName, "open"));
    Assert.assertEquals(TrackingTable.getTracker(tableName, "open"), TrackingTable.getTracker(tableName, "close"));
    Assert.assertTrue(0 < TrackingTable.getTracker("bar", "open"));
    Assert.assertEquals(TrackingTable.getTracker("bar", "open"), TrackingTable.getTracker("bar", "close"));
}
Also used : DiscoveryServiceClient(org.apache.twill.discovery.DiscoveryServiceClient) HttpGet(org.apache.http.client.methods.HttpGet) Gson(com.google.gson.Gson) DefaultHttpClient(org.apache.http.impl.client.DefaultHttpClient) TransactionSystemClient(org.apache.tephra.TransactionSystemClient) StreamEventCodec(co.cask.cdap.common.stream.StreamEventCodec) QueueProducer(co.cask.cdap.data2.queue.QueueProducer) ApplicationWithPrograms(co.cask.cdap.internal.app.deploy.pipeline.ApplicationWithPrograms) ProgramDescriptor(co.cask.cdap.app.program.ProgramDescriptor) BasicArguments(co.cask.cdap.internal.app.runtime.BasicArguments) QueueName(co.cask.cdap.common.queue.QueueName) ProgramController(co.cask.cdap.app.runtime.ProgramController) Discoverable(org.apache.twill.discovery.Discoverable) InputStreamReader(java.io.InputStreamReader) StreamEvent(co.cask.cdap.api.flow.flowlet.StreamEvent) HttpResponse(org.apache.http.HttpResponse) QueueEntry(co.cask.cdap.data2.queue.QueueEntry) Transaction(org.apache.tephra.Transaction) TransactionAware(org.apache.tephra.TransactionAware) DefaultHttpClient(org.apache.http.impl.client.DefaultHttpClient) HttpClient(org.apache.http.client.HttpClient) QueueClientFactory(co.cask.cdap.data2.queue.QueueClientFactory) RandomEndpointStrategy(co.cask.cdap.common.discovery.RandomEndpointStrategy) Test(org.junit.Test)

Example 40 with StreamEvent

use of co.cask.cdap.api.flow.flowlet.StreamEvent in project cdap by caskdata.

the class StreamBatchSource method transform.

@Override
public void transform(KeyValue<LongWritable, Object> input, Emitter<StructuredRecord> emitter) throws Exception {
    // if not format spec was given, the value is a StreamEvent
    if (Strings.isNullOrEmpty(streamBatchConfig.format)) {
        StreamEvent event = (StreamEvent) input.getValue();
        Map<String, String> headers = Objects.firstNonNull(event.getHeaders(), ImmutableMap.<String, String>of());
        StructuredRecord output = StructuredRecord.builder(DEFAULT_SCHEMA).set("ts", input.getKey().get()).set("headers", headers).set("body", event.getBody()).build();
        emitter.emit(output);
    } else {
        // otherwise, it will be a GenericStreamEventData
        @SuppressWarnings("unchecked") GenericStreamEventData<StructuredRecord> event = (GenericStreamEventData<StructuredRecord>) input.getValue();
        StructuredRecord record = event.getBody();
        Schema inputSchema = record.getSchema();
        Schema outputSchema = schemaCache.get(inputSchema);
        // if we haven't seen this schema before, generate the output schema (add ts and header fields)
        if (outputSchema == null) {
            List<Schema.Field> fields = Lists.newArrayList();
            fields.add(DEFAULT_SCHEMA.getField("ts"));
            fields.add(DEFAULT_SCHEMA.getField("headers"));
            fields.addAll(inputSchema.getFields());
            outputSchema = Schema.recordOf(inputSchema.getRecordName(), fields);
            schemaCache.put(inputSchema, outputSchema);
        }
        // easier to just deal with an empty map than deal with nullables, so the headers field is non-nullable.
        Map<String, String> headers = Objects.firstNonNull(event.getHeaders(), ImmutableMap.<String, String>of());
        StructuredRecord.Builder builder = StructuredRecord.builder(outputSchema);
        builder.set("ts", input.getKey().get());
        builder.set("headers", headers);
        for (Schema.Field field : inputSchema.getFields()) {
            String fieldName = field.getName();
            builder.set(fieldName, record.get(fieldName));
        }
        emitter.emit(builder.build());
    }
}
Also used : StreamEvent(co.cask.cdap.api.flow.flowlet.StreamEvent) Schema(co.cask.cdap.api.data.schema.Schema) GenericStreamEventData(co.cask.cdap.api.stream.GenericStreamEventData) StructuredRecord(co.cask.cdap.api.data.format.StructuredRecord)

Aggregations

StreamEvent (co.cask.cdap.api.flow.flowlet.StreamEvent)84 Test (org.junit.Test)65 Location (org.apache.twill.filesystem.Location)27 StreamId (co.cask.cdap.proto.id.StreamId)24 StructuredRecord (co.cask.cdap.api.data.format.StructuredRecord)19 FormatSpecification (co.cask.cdap.api.data.format.FormatSpecification)17 Schema (co.cask.cdap.api.data.schema.Schema)10 IOException (java.io.IOException)9 StreamConfig (co.cask.cdap.data2.transaction.stream.StreamConfig)8 ByteBuffer (java.nio.ByteBuffer)8 ConsumerConfig (co.cask.cdap.data2.queue.ConsumerConfig)7 StreamAdmin (co.cask.cdap.data2.transaction.stream.StreamAdmin)6 TransactionContext (org.apache.tephra.TransactionContext)6 BinaryDecoder (co.cask.cdap.common.io.BinaryDecoder)5 TypeToken (com.google.common.reflect.TypeToken)5 StreamEventCodec (co.cask.cdap.common.stream.StreamEventCodec)4 IdentityStreamEventDecoder (co.cask.cdap.data.stream.decoder.IdentityStreamEventDecoder)4 File (java.io.File)4 SchemaHash (co.cask.cdap.api.data.schema.SchemaHash)3 QueueName (co.cask.cdap.common.queue.QueueName)3