Search in sources :

Example 1 with KafkaPublisher

use of org.apache.twill.kafka.client.KafkaPublisher in project cdap by caskdata.

the class SpamClassifierTest method publishKafkaMessages.

private void publishKafkaMessages() throws InterruptedException, TimeoutException, ExecutionException {
    KafkaPublisher publisher = kafkaClient.getPublisher(KafkaPublisher.Ack.ALL_RECEIVED, Compression.NONE);
    final KafkaPublisher.Preparer preparer = publisher.prepare(KAFKA_TOPIC);
    // Wait for messages to be successfully published to Kafka. Retry if publishing fails.
    Tasks.waitFor(true, new Callable<Boolean>() {

        @Override
        public Boolean call() throws Exception {
            // spam
            preparer.add(Charsets.UTF_8.encode("1:REMINDER FROM O2: To get 2.50 pounds free call credit and details of great " + "offers pls reply 2 this text with your valid name, house no and postcode"), "1");
            // ham
            preparer.add(Charsets.UTF_8.encode("2:I will call you later"), "2");
            int publishedMetricsCount = 0;
            try {
                publishedMetricsCount = preparer.send().get(5, TimeUnit.SECONDS);
            } catch (Exception e) {
                LOG.error("Exception occurs when sending messages to Kafka", e);
            }
            return publishedMetricsCount == 2;
        }
    }, 15, TimeUnit.SECONDS, "Failed to publish correct number of messages to Kafka");
}
Also used : TimeoutException(java.util.concurrent.TimeoutException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) KafkaPublisher(org.apache.twill.kafka.client.KafkaPublisher)

Example 2 with KafkaPublisher

use of org.apache.twill.kafka.client.KafkaPublisher in project cdap by caskdata.

the class SparkStreamingTestRun method test.

@Test
public void test() throws Exception {
    File checkpointDir = TEMP_FOLDER.newFolder();
    KafkaPublisher publisher = KAFKA_TESTER.getKafkaClient().getPublisher(KafkaPublisher.Ack.LEADER_RECEIVED, Compression.NONE);
    ApplicationManager appManager = deployApplication(TestSparkApp.class);
    Map<String, String> args = ImmutableMap.of("checkpoint.path", checkpointDir.getAbsolutePath(), "kafka.brokers", KAFKA_TESTER.getBrokerService().getBrokerList(), "kafka.topics", "testtopic", "result.dataset", "TimeSeriesResult");
    SparkManager manager = appManager.getSparkManager(KafkaSparkStreaming.class.getSimpleName());
    manager.start(args);
    // Send 100 messages over 5 seconds
    for (int i = 0; i < 100; i++) {
        publisher.prepare("testtopic").add(Charsets.UTF_8.encode("Message " + i), "1").send();
        TimeUnit.MILLISECONDS.sleep(50);
    }
    // Sum up everything from the TimeSeriesTable. The "Message" should have count 100, while each number (0-99) should
    // have count of 1
    final DataSetManager<TimeseriesTable> tsTableManager = getDataset("TimeSeriesResult");
    final TimeseriesTable tsTable = tsTableManager.get();
    Tasks.waitFor(100L, new Callable<Long>() {

        @Override
        public Long call() throws Exception {
            tsTableManager.flush();
            return getCounts("Message", tsTable);
        }
    }, 1, TimeUnit.MINUTES, 1, TimeUnit.SECONDS);
    for (int i = 0; i < 100; i++) {
        final int finalI = i;
        Tasks.waitFor(1L, new Callable<Long>() {

            @Override
            public Long call() throws Exception {
                tsTableManager.flush();
                return getCounts(Integer.toString(finalI), tsTable);
            }
        }, 1, TimeUnit.MINUTES, 1, TimeUnit.SECONDS);
    }
    manager.stop();
    manager.waitForRun(ProgramRunStatus.KILLED, 60, TimeUnit.SECONDS);
    // Send 100 more messages without pause
    for (int i = 100; i < 200; i++) {
        publisher.prepare("testtopic").add(Charsets.UTF_8.encode("Message " + i), "1").send();
    }
    // Start the streaming app again. It should resume from where it left off because of checkpoint
    manager.start(args);
    // Expects "Message" having count = 200.
    Tasks.waitFor(100L, new Callable<Long>() {

        @Override
        public Long call() throws Exception {
            tsTableManager.flush();
            return getCounts("Message", tsTable);
        }
    }, 1, TimeUnit.MINUTES, 1, TimeUnit.SECONDS);
    // Expects each number (0-199) have count of 1
    for (int i = 0; i < 200; i++) {
        final int finalI = i;
        Tasks.waitFor(1L, new Callable<Long>() {

            @Override
            public Long call() throws Exception {
                tsTableManager.flush();
                return getCounts(Integer.toString(finalI), tsTable);
            }
        }, 1, TimeUnit.MINUTES, 1, TimeUnit.SECONDS);
    }
    manager.stop();
    manager.waitForRuns(ProgramRunStatus.KILLED, 2, 60, TimeUnit.SECONDS);
}
Also used : ApplicationManager(co.cask.cdap.test.ApplicationManager) SparkManager(co.cask.cdap.test.SparkManager) TimeseriesTable(co.cask.cdap.api.dataset.lib.TimeseriesTable) KafkaSparkStreaming(co.cask.cdap.spark.app.KafkaSparkStreaming) File(java.io.File) KafkaPublisher(org.apache.twill.kafka.client.KafkaPublisher) Test(org.junit.Test)

Example 3 with KafkaPublisher

use of org.apache.twill.kafka.client.KafkaPublisher in project cdap by caskdata.

the class MetricsProcessorServiceTest method testMetricsProcessor.

@Test
public void testMetricsProcessor() throws Exception {
    injector.getInstance(TransactionManager.class).startAndWait();
    injector.getInstance(DatasetOpExecutor.class).startAndWait();
    injector.getInstance(DatasetService.class).startAndWait();
    zkServer = InMemoryZKServer.builder().build();
    zkServer.startAndWait();
    Properties kafkaConfig = generateKafkaConfig(tmpFolder1);
    EmbeddedKafkaServer kafkaServer = new EmbeddedKafkaServer(kafkaConfig);
    kafkaServer.startAndWait();
    ZKClientService zkClient = ZKClientService.Builder.of(zkServer.getConnectionStr()).build();
    zkClient.startAndWait();
    KafkaClientService kafkaClient = new ZKKafkaClientService(zkClient);
    kafkaClient.startAndWait();
    final MetricStore metricStore = injector.getInstance(MetricStore.class);
    Set<Integer> partitions = new HashSet<>();
    for (int i = 0; i < PARTITION_SIZE; i++) {
        partitions.add(i);
    }
    KafkaPublisher publisher = kafkaClient.getPublisher(KafkaPublisher.Ack.FIRE_AND_FORGET, Compression.SNAPPY);
    final KafkaPublisher.Preparer preparer = publisher.prepare(TOPIC_PREFIX);
    // Wait for metrics to be successfully published to Kafka. Retry if publishing fails.
    Tasks.waitFor(true, new Callable<Boolean>() {

        @Override
        public Boolean call() throws Exception {
            return publishKafkaMetrics(METRICS_CONTEXT, expected, preparer);
        }
    }, 15, TimeUnit.SECONDS, "Failed to publish correct number of metrics to Kafka");
    // Start KafkaMetricsProcessorService after metrics are published to Kafka
    KafkaMetricsProcessorService kafkaMetricsProcessorService = new KafkaMetricsProcessorService(kafkaClient, injector.getInstance(MetricDatasetFactory.class), new MetricsMessageCallbackFactory(injector.getInstance(SchemaGenerator.class), injector.getInstance(DatumReaderFactory.class), metricStore, 4), TOPIC_PREFIX, partitions, new NoopMetricsContext());
    kafkaMetricsProcessorService.startAndWait();
    // Intentionally set queue size to a small value, so that MessagingMetricsProcessorService
    // internally can persist metrics when more messages are to be fetched
    MessagingMetricsProcessorService messagingMetricsProcessorService = new MessagingMetricsProcessorService(injector.getInstance(MetricDatasetFactory.class), TOPIC_PREFIX, messagingService, injector.getInstance(SchemaGenerator.class), injector.getInstance(DatumReaderFactory.class), metricStore, 1000L, 5, partitions, new NoopMetricsContext(), 50, 0);
    messagingMetricsProcessorService.startAndWait();
    long startTime = TimeUnit.MILLISECONDS.toSeconds(System.currentTimeMillis());
    // Publish metrics with messaging service and record expected metrics
    for (int i = 10; i < 20; i++) {
        publishMessagingMetrics(i, startTime, METRICS_CONTEXT, expected, SYSTEM_METRIC_PREFIX, MetricType.COUNTER);
    }
    Thread.sleep(500);
    // Stop and restart messagingMetricsProcessorService
    messagingMetricsProcessorService.stopAndWait();
    // Intentionally set queue size to a large value, so that MessagingMetricsProcessorService
    // internally only persists metrics during terminating.
    messagingMetricsProcessorService = new MessagingMetricsProcessorService(injector.getInstance(MetricDatasetFactory.class), TOPIC_PREFIX, messagingService, injector.getInstance(SchemaGenerator.class), injector.getInstance(DatumReaderFactory.class), metricStore, 500L, 100, partitions, new NoopMetricsContext(), 50, 0);
    messagingMetricsProcessorService.startAndWait();
    // Publish metrics after MessagingMetricsProcessorService restarts and record expected metrics
    for (int i = 20; i < 30; i++) {
        publishMessagingMetrics(i, startTime, METRICS_CONTEXT, expected, SYSTEM_METRIC_PREFIX, MetricType.GAUGE);
    }
    final List<String> missingMetricNames = new ArrayList<>();
    // are retrieved when timeout occurs, print out the missing metrics
    try {
        Tasks.waitFor(true, new Callable<Boolean>() {

            @Override
            public Boolean call() throws Exception {
                return canQueryAllMetrics(metricStore, METRICS_CONTEXT, expected, missingMetricNames);
            }
        }, 10000, TimeUnit.MILLISECONDS, "Failed to get all metrics");
    } catch (TimeoutException e) {
        Assert.fail(String.format("Metrics: [%s] cannot be found in the metrics store.", Joiner.on(", ").join(missingMetricNames)));
    }
    // Query metrics from the metricStore and compare them with the expected ones
    assertMetricsResult(metricStore, METRICS_CONTEXT, expected);
    // Query for the 5 counter metrics published with messaging between time 5 - 14
    Collection<MetricTimeSeries> queryResult = metricStore.query(new MetricDataQuery(5, 14, 1, Integer.MAX_VALUE, ImmutableMap.of(SYSTEM_METRIC_PREFIX + COUNTER_METRIC_NAME, AggregationFunction.SUM), METRICS_CONTEXT, ImmutableList.<String>of(), null));
    MetricTimeSeries timeSeries = Iterables.getOnlyElement(queryResult);
    Assert.assertEquals(5, timeSeries.getTimeValues().size());
    for (TimeValue timeValue : timeSeries.getTimeValues()) {
        Assert.assertEquals(1L, timeValue.getValue());
    }
    // Stop services and servers
    kafkaMetricsProcessorService.stopAndWait();
    messagingMetricsProcessorService.stopAndWait();
    kafkaServer.stopAndWait();
    zkServer.stopAndWait();
    // Delete all metrics
    metricStore.deleteAll();
}
Also used : MetricStore(co.cask.cdap.api.metrics.MetricStore) ZKKafkaClientService(org.apache.twill.internal.kafka.client.ZKKafkaClientService) DatumReaderFactory(co.cask.cdap.internal.io.DatumReaderFactory) ArrayList(java.util.ArrayList) MetricTimeSeries(co.cask.cdap.api.metrics.MetricTimeSeries) DatasetService(co.cask.cdap.data2.datafabric.dataset.service.DatasetService) Properties(java.util.Properties) NoopMetricsContext(co.cask.cdap.api.metrics.NoopMetricsContext) MetricDatasetFactory(co.cask.cdap.metrics.store.MetricDatasetFactory) EmbeddedKafkaServer(org.apache.twill.internal.kafka.EmbeddedKafkaServer) TimeValue(co.cask.cdap.api.dataset.lib.cube.TimeValue) HashSet(java.util.HashSet) KafkaPublisher(org.apache.twill.kafka.client.KafkaPublisher) TimeoutException(java.util.concurrent.TimeoutException) ZKKafkaClientService(org.apache.twill.internal.kafka.client.ZKKafkaClientService) KafkaClientService(org.apache.twill.kafka.client.KafkaClientService) SchemaGenerator(co.cask.cdap.internal.io.SchemaGenerator) DatasetOpExecutor(co.cask.cdap.data2.datafabric.dataset.service.executor.DatasetOpExecutor) TopicNotFoundException(co.cask.cdap.api.messaging.TopicNotFoundException) TimeoutException(java.util.concurrent.TimeoutException) IOException(java.io.IOException) ZKClientService(org.apache.twill.zookeeper.ZKClientService) TransactionManager(org.apache.tephra.TransactionManager) MetricDataQuery(co.cask.cdap.api.metrics.MetricDataQuery) Test(org.junit.Test)

Aggregations

KafkaPublisher (org.apache.twill.kafka.client.KafkaPublisher)3 IOException (java.io.IOException)2 TimeoutException (java.util.concurrent.TimeoutException)2 Test (org.junit.Test)2 TimeseriesTable (co.cask.cdap.api.dataset.lib.TimeseriesTable)1 TimeValue (co.cask.cdap.api.dataset.lib.cube.TimeValue)1 TopicNotFoundException (co.cask.cdap.api.messaging.TopicNotFoundException)1 MetricDataQuery (co.cask.cdap.api.metrics.MetricDataQuery)1 MetricStore (co.cask.cdap.api.metrics.MetricStore)1 MetricTimeSeries (co.cask.cdap.api.metrics.MetricTimeSeries)1 NoopMetricsContext (co.cask.cdap.api.metrics.NoopMetricsContext)1 DatasetService (co.cask.cdap.data2.datafabric.dataset.service.DatasetService)1 DatasetOpExecutor (co.cask.cdap.data2.datafabric.dataset.service.executor.DatasetOpExecutor)1 DatumReaderFactory (co.cask.cdap.internal.io.DatumReaderFactory)1 SchemaGenerator (co.cask.cdap.internal.io.SchemaGenerator)1 MetricDatasetFactory (co.cask.cdap.metrics.store.MetricDatasetFactory)1 KafkaSparkStreaming (co.cask.cdap.spark.app.KafkaSparkStreaming)1 ApplicationManager (co.cask.cdap.test.ApplicationManager)1 SparkManager (co.cask.cdap.test.SparkManager)1 File (java.io.File)1