Search in sources :

Example 1 with GenericRow

use of io.confluent.ksql.GenericRow in project ksql by confluentinc.

the class EndToEndIntegrationTest method shouldSelectAllFromDerivedStream.

@Test
public void shouldSelectAllFromDerivedStream() throws Exception {
    executeStatement("CREATE STREAM pageviews_female" + " AS SELECT %s.userid AS userid, pageid, regionid, gender " + " FROM %s " + " LEFT JOIN %s ON %s.userid = %s.userid" + " WHERE gender = 'FEMALE';", userTable, pageViewStream, userTable, pageViewStream, userTable);
    final QueuedQueryMetadata queryMetadata = executeQuery("SELECT * from pageviews_female;");
    List<KeyValue<String, GenericRow>> results = new ArrayList<>();
    BlockingQueue<KeyValue<String, GenericRow>> rowQueue = queryMetadata.getRowQueue();
    // From the mock data, we expect exactly 3 page views from female users.
    List<String> expectedPages = Arrays.asList("PAGE_2", "PAGE_5", "PAGE_5");
    List<String> expectedUsers = Arrays.asList("USER_2", "USER_0", "USER_2");
    List<String> actualPages = new ArrayList<>();
    List<String> actualUsers = new ArrayList<>();
    TestUtils.waitForCondition(() -> {
        try {
            log.debug("polling from pageviews_female");
            KeyValue<String, GenericRow> nextRow = rowQueue.poll(8000, TimeUnit.MILLISECONDS);
            if (nextRow != null) {
                results.add(nextRow);
            } else {
                // If we didn't receive any records on the output topic for 8 seconds, it probably means that the join
                // failed because the table data wasn't populated when the stream data was consumed. We should just
                // re populate the stream data to try the join again.
                log.warn("repopulating data in {} because the join returned empty results.", pageViewTopic);
                testHarness.publishTestData(pageViewTopic, pageViewDataProvider, System.currentTimeMillis());
            }
        } catch (Exception e) {
            log.error("Got exception when polling from pageviews_female", e);
        }
        return 3 <= results.size();
    }, 30000, "Could not consume any records from " + pageViewTopic + " for 30 seconds");
    for (KeyValue<String, GenericRow> result : results) {
        List<Object> columns = result.value.getColumns();
        log.debug("pageview join: {}", columns);
        assertEquals(6, columns.size());
        String user = (String) columns.get(2);
        actualUsers.add(user);
        String page = (String) columns.get(3);
        actualPages.add(page);
    }
    assertEquals(expectedPages, actualPages);
    assertEquals(expectedUsers, actualUsers);
}
Also used : GenericRow(io.confluent.ksql.GenericRow) KeyValue(org.apache.kafka.streams.KeyValue) QueuedQueryMetadata(io.confluent.ksql.util.QueuedQueryMetadata) ArrayList(java.util.ArrayList) IntegrationTest(org.apache.kafka.test.IntegrationTest) Test(org.junit.Test)

Example 2 with GenericRow

use of io.confluent.ksql.GenericRow in project ksql by confluentinc.

the class IntegrationTestHarness method consumeData.

public <K> Map<K, GenericRow> consumeData(String topic, Schema schema, int expectedNumMessages, Deserializer<K> keyDeserializer, long resultsPollMaxTimeMs, DataSource.DataSourceSerDe dataSourceSerDe) {
    topic = topic.toUpperCase();
    Map<K, GenericRow> result = new HashMap<>();
    Properties consumerConfig = new Properties();
    consumerConfig.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, ksqlConfig.get(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG));
    consumerConfig.put(ConsumerConfig.GROUP_ID_CONFIG, CONSUMER_GROUP_ID_PREFIX + System.currentTimeMillis());
    consumerConfig.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
    try (KafkaConsumer<K, GenericRow> consumer = new KafkaConsumer<>(consumerConfig, keyDeserializer, getDeserializer(schema, dataSourceSerDe))) {
        consumer.subscribe(Collections.singleton(topic));
        long pollStart = System.currentTimeMillis();
        long pollEnd = pollStart + resultsPollMaxTimeMs;
        while (System.currentTimeMillis() < pollEnd && continueConsuming(result.size(), expectedNumMessages)) {
            for (ConsumerRecord<K, GenericRow> record : consumer.poll(Math.max(1, pollEnd - System.currentTimeMillis()))) {
                if (record.value() != null) {
                    result.put(record.key(), record.value());
                }
            }
        }
        for (ConsumerRecord<K, GenericRow> record : consumer.poll(RESULTS_EXTRA_POLL_TIME_MS)) {
            if (record.value() != null) {
                result.put(record.key(), record.value());
            }
        }
    }
    return result;
}
Also used : GenericRow(io.confluent.ksql.GenericRow) HashMap(java.util.HashMap) KafkaConsumer(org.apache.kafka.clients.consumer.KafkaConsumer) Properties(java.util.Properties)

Example 3 with GenericRow

use of io.confluent.ksql.GenericRow in project ksql by confluentinc.

the class IntegrationTestHarness method produceData.

/**
 * Topic topicName will be automatically created if it doesn't exist.
 * @param topicName
 * @param recordsToPublish
 * @param timestamp
 * @return
 * @throws InterruptedException
 * @throws TimeoutException
 * @throws ExecutionException
 */
public Map<String, RecordMetadata> produceData(String topicName, Map<String, GenericRow> recordsToPublish, Serializer<GenericRow> serializer, Long timestamp) throws InterruptedException, TimeoutException, ExecutionException {
    createTopic(topicName);
    Properties producerConfig = properties();
    KafkaProducer<String, GenericRow> producer = new KafkaProducer<>(producerConfig, new StringSerializer(), serializer);
    Map<String, RecordMetadata> result = new HashMap<>();
    for (Map.Entry<String, GenericRow> recordEntry : recordsToPublish.entrySet()) {
        String key = recordEntry.getKey();
        Future<RecordMetadata> recordMetadataFuture = producer.send(buildRecord(topicName, timestamp, recordEntry, key));
        result.put(key, recordMetadataFuture.get(TEST_RECORD_FUTURE_TIMEOUT_MS, TimeUnit.MILLISECONDS));
    }
    producer.close();
    return result;
}
Also used : GenericRow(io.confluent.ksql.GenericRow) KafkaProducer(org.apache.kafka.clients.producer.KafkaProducer) RecordMetadata(org.apache.kafka.clients.producer.RecordMetadata) HashMap(java.util.HashMap) Properties(java.util.Properties) StringSerializer(org.apache.kafka.common.serialization.StringSerializer) HashMap(java.util.HashMap) Map(java.util.Map)

Example 4 with GenericRow

use of io.confluent.ksql.GenericRow in project ksql by confluentinc.

the class JsonFormatTest method testSelectDateTimeUDFs.

// @Test
public void testSelectDateTimeUDFs() throws Exception {
    final String streamName = "SelectDateTimeUDFsStream".toUpperCase();
    final String selectColumns = "(ORDERTIME+1500962514806) , TIMESTAMPTOSTRING(ORDERTIME+1500962514806, " + "'yyyy-MM-dd HH:mm:ss.SSS'), " + "STRINGTOTIMESTAMP" + "(TIMESTAMPTOSTRING" + "(ORDERTIME+1500962514806, 'yyyy-MM-dd HH:mm:ss.SSS'), 'yyyy-MM-dd HH:mm:ss.SSS')";
    final String whereClause = "ORDERUNITS > 20 AND ITEMID LIKE '%_8'";
    final String queryString = String.format("CREATE STREAM %s AS SELECT %s FROM %s WHERE %s;", streamName, selectColumns, inputStream, whereClause);
    executePersistentQuery(queryString);
    Schema resultSchema = SchemaUtil.removeImplicitRowTimeRowKeyFromSchema(metaStore.getSource(streamName).getSchema());
    Map<String, GenericRow> expectedResults = new HashMap<>();
    expectedResults.put("8", new GenericRow(Arrays.asList(1500962514814L, "2017-07-24 23:01:54.814", 1500962514814L)));
    Map<String, GenericRow> results = readNormalResults(streamName, resultSchema, expectedResults.size());
    assertThat(results, equalTo(expectedResults));
}
Also used : GenericRow(io.confluent.ksql.GenericRow) HashMap(java.util.HashMap) Schema(org.apache.kafka.connect.data.Schema)

Example 5 with GenericRow

use of io.confluent.ksql.GenericRow in project ksql by confluentinc.

the class JsonFormatTest method produceInitData.

private void produceInitData() throws Exception {
    OrderDataProvider orderDataProvider = new OrderDataProvider();
    topicProducer.produceInputData(inputTopic, orderDataProvider.data(), orderDataProvider.schema());
    Schema messageSchema = SchemaBuilder.struct().field("MESSAGE", SchemaBuilder.STRING_SCHEMA).build();
    GenericRow messageRow = new GenericRow(Collections.singletonList("{\"log\":{\"@timestamp\":\"2017-05-30T16:44:22.175Z\",\"@version\":\"1\"," + "\"caasVersion\":\"0.0.2\",\"cloud\":\"aws\",\"logs\":[{\"entry\":\"first\"}],\"clusterId\":\"cp99\",\"clusterName\":\"kafka\",\"cpComponentId\":\"kafka\",\"host\":\"kafka-1-wwl0p\",\"k8sId\":\"k8s13\",\"k8sName\":\"perf\",\"level\":\"ERROR\",\"logger\":\"kafka.server.ReplicaFetcherThread\",\"message\":\"Found invalid messages during fetch for partition [foo512,172] offset 0 error Record is corrupt (stored crc = 1321230880, computed crc = 1139143803)\",\"networkId\":\"vpc-d8c7a9bf\",\"region\":\"us-west-2\",\"serverId\":\"1\",\"skuId\":\"sku5\",\"source\":\"kafka\",\"tenantId\":\"t47\",\"tenantName\":\"perf-test\",\"thread\":\"ReplicaFetcherThread-0-2\",\"zone\":\"us-west-2a\"},\"stream\":\"stdout\",\"time\":2017}"));
    Map<String, GenericRow> records = new HashMap<>();
    records.put("1", messageRow);
    topicProducer.produceInputData(messageLogTopic, records, messageSchema);
}
Also used : GenericRow(io.confluent.ksql.GenericRow) OrderDataProvider(io.confluent.ksql.util.OrderDataProvider) HashMap(java.util.HashMap) Schema(org.apache.kafka.connect.data.Schema)

Aggregations

GenericRow (io.confluent.ksql.GenericRow)65 Test (org.junit.Test)38 HashMap (java.util.HashMap)27 Schema (org.apache.kafka.connect.data.Schema)19 List (java.util.List)15 StringDeserializer (org.apache.kafka.common.serialization.StringDeserializer)15 ArrayList (java.util.ArrayList)11 MockSchemaRegistryClient (io.confluent.kafka.schemaregistry.client.MockSchemaRegistryClient)9 IntegrationTest (io.confluent.common.utils.IntegrationTest)8 SchemaRegistryClient (io.confluent.kafka.schemaregistry.client.SchemaRegistryClient)8 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)5 KsqlTopicSerDe (io.confluent.ksql.serde.KsqlTopicSerDe)5 KafkaTopicClient (io.confluent.ksql.util.KafkaTopicClient)5 KafkaTopicClientImpl (io.confluent.ksql.util.KafkaTopicClientImpl)5 KsqlConfig (io.confluent.ksql.util.KsqlConfig)5 Map (java.util.Map)5 GenericRecord (org.apache.avro.generic.GenericRecord)4 Windowed (org.apache.kafka.streams.kstream.Windowed)4 KafkaAvroDeserializer (io.confluent.kafka.serializers.KafkaAvroDeserializer)3 DereferenceExpression (io.confluent.ksql.parser.tree.DereferenceExpression)3