use of io.confluent.ksql.GenericRow in project ksql by confluentinc.
the class EndToEndIntegrationTest method shouldSelectAllFromDerivedStream.
@Test
public void shouldSelectAllFromDerivedStream() throws Exception {
executeStatement("CREATE STREAM pageviews_female" + " AS SELECT %s.userid AS userid, pageid, regionid, gender " + " FROM %s " + " LEFT JOIN %s ON %s.userid = %s.userid" + " WHERE gender = 'FEMALE';", userTable, pageViewStream, userTable, pageViewStream, userTable);
final QueuedQueryMetadata queryMetadata = executeQuery("SELECT * from pageviews_female;");
List<KeyValue<String, GenericRow>> results = new ArrayList<>();
BlockingQueue<KeyValue<String, GenericRow>> rowQueue = queryMetadata.getRowQueue();
// From the mock data, we expect exactly 3 page views from female users.
List<String> expectedPages = Arrays.asList("PAGE_2", "PAGE_5", "PAGE_5");
List<String> expectedUsers = Arrays.asList("USER_2", "USER_0", "USER_2");
List<String> actualPages = new ArrayList<>();
List<String> actualUsers = new ArrayList<>();
TestUtils.waitForCondition(() -> {
try {
log.debug("polling from pageviews_female");
KeyValue<String, GenericRow> nextRow = rowQueue.poll(8000, TimeUnit.MILLISECONDS);
if (nextRow != null) {
results.add(nextRow);
} else {
// If we didn't receive any records on the output topic for 8 seconds, it probably means that the join
// failed because the table data wasn't populated when the stream data was consumed. We should just
// re populate the stream data to try the join again.
log.warn("repopulating data in {} because the join returned empty results.", pageViewTopic);
testHarness.publishTestData(pageViewTopic, pageViewDataProvider, System.currentTimeMillis());
}
} catch (Exception e) {
log.error("Got exception when polling from pageviews_female", e);
}
return 3 <= results.size();
}, 30000, "Could not consume any records from " + pageViewTopic + " for 30 seconds");
for (KeyValue<String, GenericRow> result : results) {
List<Object> columns = result.value.getColumns();
log.debug("pageview join: {}", columns);
assertEquals(6, columns.size());
String user = (String) columns.get(2);
actualUsers.add(user);
String page = (String) columns.get(3);
actualPages.add(page);
}
assertEquals(expectedPages, actualPages);
assertEquals(expectedUsers, actualUsers);
}
use of io.confluent.ksql.GenericRow in project ksql by confluentinc.
the class IntegrationTestHarness method consumeData.
public <K> Map<K, GenericRow> consumeData(String topic, Schema schema, int expectedNumMessages, Deserializer<K> keyDeserializer, long resultsPollMaxTimeMs, DataSource.DataSourceSerDe dataSourceSerDe) {
topic = topic.toUpperCase();
Map<K, GenericRow> result = new HashMap<>();
Properties consumerConfig = new Properties();
consumerConfig.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, ksqlConfig.get(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG));
consumerConfig.put(ConsumerConfig.GROUP_ID_CONFIG, CONSUMER_GROUP_ID_PREFIX + System.currentTimeMillis());
consumerConfig.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
try (KafkaConsumer<K, GenericRow> consumer = new KafkaConsumer<>(consumerConfig, keyDeserializer, getDeserializer(schema, dataSourceSerDe))) {
consumer.subscribe(Collections.singleton(topic));
long pollStart = System.currentTimeMillis();
long pollEnd = pollStart + resultsPollMaxTimeMs;
while (System.currentTimeMillis() < pollEnd && continueConsuming(result.size(), expectedNumMessages)) {
for (ConsumerRecord<K, GenericRow> record : consumer.poll(Math.max(1, pollEnd - System.currentTimeMillis()))) {
if (record.value() != null) {
result.put(record.key(), record.value());
}
}
}
for (ConsumerRecord<K, GenericRow> record : consumer.poll(RESULTS_EXTRA_POLL_TIME_MS)) {
if (record.value() != null) {
result.put(record.key(), record.value());
}
}
}
return result;
}
use of io.confluent.ksql.GenericRow in project ksql by confluentinc.
the class IntegrationTestHarness method produceData.
/**
* Topic topicName will be automatically created if it doesn't exist.
* @param topicName
* @param recordsToPublish
* @param timestamp
* @return
* @throws InterruptedException
* @throws TimeoutException
* @throws ExecutionException
*/
public Map<String, RecordMetadata> produceData(String topicName, Map<String, GenericRow> recordsToPublish, Serializer<GenericRow> serializer, Long timestamp) throws InterruptedException, TimeoutException, ExecutionException {
createTopic(topicName);
Properties producerConfig = properties();
KafkaProducer<String, GenericRow> producer = new KafkaProducer<>(producerConfig, new StringSerializer(), serializer);
Map<String, RecordMetadata> result = new HashMap<>();
for (Map.Entry<String, GenericRow> recordEntry : recordsToPublish.entrySet()) {
String key = recordEntry.getKey();
Future<RecordMetadata> recordMetadataFuture = producer.send(buildRecord(topicName, timestamp, recordEntry, key));
result.put(key, recordMetadataFuture.get(TEST_RECORD_FUTURE_TIMEOUT_MS, TimeUnit.MILLISECONDS));
}
producer.close();
return result;
}
use of io.confluent.ksql.GenericRow in project ksql by confluentinc.
the class JsonFormatTest method testSelectDateTimeUDFs.
// @Test
public void testSelectDateTimeUDFs() throws Exception {
final String streamName = "SelectDateTimeUDFsStream".toUpperCase();
final String selectColumns = "(ORDERTIME+1500962514806) , TIMESTAMPTOSTRING(ORDERTIME+1500962514806, " + "'yyyy-MM-dd HH:mm:ss.SSS'), " + "STRINGTOTIMESTAMP" + "(TIMESTAMPTOSTRING" + "(ORDERTIME+1500962514806, 'yyyy-MM-dd HH:mm:ss.SSS'), 'yyyy-MM-dd HH:mm:ss.SSS')";
final String whereClause = "ORDERUNITS > 20 AND ITEMID LIKE '%_8'";
final String queryString = String.format("CREATE STREAM %s AS SELECT %s FROM %s WHERE %s;", streamName, selectColumns, inputStream, whereClause);
executePersistentQuery(queryString);
Schema resultSchema = SchemaUtil.removeImplicitRowTimeRowKeyFromSchema(metaStore.getSource(streamName).getSchema());
Map<String, GenericRow> expectedResults = new HashMap<>();
expectedResults.put("8", new GenericRow(Arrays.asList(1500962514814L, "2017-07-24 23:01:54.814", 1500962514814L)));
Map<String, GenericRow> results = readNormalResults(streamName, resultSchema, expectedResults.size());
assertThat(results, equalTo(expectedResults));
}
use of io.confluent.ksql.GenericRow in project ksql by confluentinc.
the class JsonFormatTest method produceInitData.
private void produceInitData() throws Exception {
OrderDataProvider orderDataProvider = new OrderDataProvider();
topicProducer.produceInputData(inputTopic, orderDataProvider.data(), orderDataProvider.schema());
Schema messageSchema = SchemaBuilder.struct().field("MESSAGE", SchemaBuilder.STRING_SCHEMA).build();
GenericRow messageRow = new GenericRow(Collections.singletonList("{\"log\":{\"@timestamp\":\"2017-05-30T16:44:22.175Z\",\"@version\":\"1\"," + "\"caasVersion\":\"0.0.2\",\"cloud\":\"aws\",\"logs\":[{\"entry\":\"first\"}],\"clusterId\":\"cp99\",\"clusterName\":\"kafka\",\"cpComponentId\":\"kafka\",\"host\":\"kafka-1-wwl0p\",\"k8sId\":\"k8s13\",\"k8sName\":\"perf\",\"level\":\"ERROR\",\"logger\":\"kafka.server.ReplicaFetcherThread\",\"message\":\"Found invalid messages during fetch for partition [foo512,172] offset 0 error Record is corrupt (stored crc = 1321230880, computed crc = 1139143803)\",\"networkId\":\"vpc-d8c7a9bf\",\"region\":\"us-west-2\",\"serverId\":\"1\",\"skuId\":\"sku5\",\"source\":\"kafka\",\"tenantId\":\"t47\",\"tenantName\":\"perf-test\",\"thread\":\"ReplicaFetcherThread-0-2\",\"zone\":\"us-west-2a\"},\"stream\":\"stdout\",\"time\":2017}"));
Map<String, GenericRow> records = new HashMap<>();
records.put("1", messageRow);
topicProducer.produceInputData(messageLogTopic, records, messageSchema);
}
Aggregations