use of io.trino.testing.kafka.TestingKafka in project trino by trinodb.
the class PinotQueryRunner method main.
public static void main(String[] args) throws Exception {
Logging.initialize();
TestingKafka kafka = TestingKafka.createWithSchemaRegistry();
kafka.start();
TestingPinotCluster pinot = new TestingPinotCluster(kafka.getNetwork(), false);
pinot.start();
Map<String, String> properties = ImmutableMap.of("http-server.http.port", "8080");
Map<String, String> pinotProperties = ImmutableMap.<String, String>builder().put("pinot.controller-urls", pinot.getControllerConnectString()).put("pinot.segments-per-split", "10").put("pinot.request-timeout", "3m").buildOrThrow();
DistributedQueryRunner queryRunner = createPinotQueryRunner(properties, pinotProperties, Optional.empty());
Thread.sleep(10);
Logger log = Logger.get(PinotQueryRunner.class);
log.info("======== SERVER STARTED ========");
log.info("\n====\n%s\n====", queryRunner.getCoordinator().getBaseUrl());
}
use of io.trino.testing.kafka.TestingKafka in project trino by trinodb.
the class AbstractPinotIntegrationSmokeTest method createQueryRunner.
@Override
protected QueryRunner createQueryRunner() throws Exception {
TestingKafka kafka = closeAfterClass(TestingKafka.createWithSchemaRegistry());
kafka.start();
TestingPinotCluster pinot = closeAfterClass(new TestingPinotCluster(kafka.getNetwork(), isSecured()));
pinot.start();
// Create and populate the all_types topic and table
kafka.createTopic(ALL_TYPES_TABLE);
ImmutableList.Builder<ProducerRecord<String, GenericRecord>> allTypesRecordsBuilder = ImmutableList.builder();
for (int i = 0, step = 1200; i < MAX_ROWS_PER_SPLIT_FOR_SEGMENT_QUERIES - 2; i++) {
int offset = i * step;
allTypesRecordsBuilder.add(new ProducerRecord<>(ALL_TYPES_TABLE, "key" + i * step, createTestRecord(Arrays.asList("string_" + (offset), "string1_" + (offset + 1), "string2_" + (offset + 2)), true, Arrays.asList(54 + i / 3, -10001, 1000), Arrays.asList(-7.33F + i, Float.POSITIVE_INFINITY, 17.034F + i), Arrays.asList(-17.33D + i, Double.POSITIVE_INFINITY, 10596.034D + i), Arrays.asList(-3147483647L + i, 12L - i, 4147483647L + i), initialUpdatedAt.plusMillis(offset).toEpochMilli())));
}
allTypesRecordsBuilder.add(new ProducerRecord<>(ALL_TYPES_TABLE, null, createNullRecord()));
allTypesRecordsBuilder.add(new ProducerRecord<>(ALL_TYPES_TABLE, null, createArrayNullRecord()));
kafka.sendMessages(allTypesRecordsBuilder.build().stream(), schemaRegistryAwareProducer(kafka));
pinot.createSchema(getClass().getClassLoader().getResourceAsStream("alltypes_schema.json"), ALL_TYPES_TABLE);
pinot.addRealTimeTable(getClass().getClassLoader().getResourceAsStream("alltypes_realtimeSpec.json"), ALL_TYPES_TABLE);
// Create and populate mixed case table and topic
kafka.createTopic(MIXED_CASE_COLUMN_NAMES_TABLE);
Schema mixedCaseAvroSchema = SchemaBuilder.record(MIXED_CASE_COLUMN_NAMES_TABLE).fields().name("stringCol").type().stringType().noDefault().name("longCol").type().optional().longType().name("updatedAt").type().longType().noDefault().endRecord();
List<ProducerRecord<String, GenericRecord>> mixedCaseProducerRecords = ImmutableList.<ProducerRecord<String, GenericRecord>>builder().add(new ProducerRecord<>(MIXED_CASE_COLUMN_NAMES_TABLE, "key0", new GenericRecordBuilder(mixedCaseAvroSchema).set("stringCol", "string_0").set("longCol", 0L).set("updatedAt", initialUpdatedAt.toEpochMilli()).build())).add(new ProducerRecord<>(MIXED_CASE_COLUMN_NAMES_TABLE, "key1", new GenericRecordBuilder(mixedCaseAvroSchema).set("stringCol", "string_1").set("longCol", 1L).set("updatedAt", initialUpdatedAt.plusMillis(1000).toEpochMilli()).build())).add(new ProducerRecord<>(MIXED_CASE_COLUMN_NAMES_TABLE, "key2", new GenericRecordBuilder(mixedCaseAvroSchema).set("stringCol", "string_2").set("longCol", 2L).set("updatedAt", initialUpdatedAt.plusMillis(2000).toEpochMilli()).build())).add(new ProducerRecord<>(MIXED_CASE_COLUMN_NAMES_TABLE, "key3", new GenericRecordBuilder(mixedCaseAvroSchema).set("stringCol", "string_3").set("longCol", 3L).set("updatedAt", initialUpdatedAt.plusMillis(3000).toEpochMilli()).build())).build();
kafka.sendMessages(mixedCaseProducerRecords.stream(), schemaRegistryAwareProducer(kafka));
pinot.createSchema(getClass().getClassLoader().getResourceAsStream("mixed_case_schema.json"), MIXED_CASE_COLUMN_NAMES_TABLE);
pinot.addRealTimeTable(getClass().getClassLoader().getResourceAsStream("mixed_case_realtimeSpec.json"), MIXED_CASE_COLUMN_NAMES_TABLE);
// Create and populate mixed case distinct table and topic
kafka.createTopic(MIXED_CASE_DISTINCT_TABLE);
Schema mixedCaseDistinctAvroSchema = SchemaBuilder.record(MIXED_CASE_DISTINCT_TABLE).fields().name("string_col").type().stringType().noDefault().name("updated_at").type().longType().noDefault().endRecord();
List<ProducerRecord<String, GenericRecord>> mixedCaseDistinctProducerRecords = ImmutableList.<ProducerRecord<String, GenericRecord>>builder().add(new ProducerRecord<>(MIXED_CASE_DISTINCT_TABLE, "key0", new GenericRecordBuilder(mixedCaseDistinctAvroSchema).set("string_col", "A").set("updated_at", initialUpdatedAt.toEpochMilli()).build())).add(new ProducerRecord<>(MIXED_CASE_DISTINCT_TABLE, "key1", new GenericRecordBuilder(mixedCaseDistinctAvroSchema).set("string_col", "a").set("updated_at", initialUpdatedAt.plusMillis(1000).toEpochMilli()).build())).add(new ProducerRecord<>(MIXED_CASE_DISTINCT_TABLE, "key2", new GenericRecordBuilder(mixedCaseDistinctAvroSchema).set("string_col", "B").set("updated_at", initialUpdatedAt.plusMillis(2000).toEpochMilli()).build())).add(new ProducerRecord<>(MIXED_CASE_DISTINCT_TABLE, "key3", new GenericRecordBuilder(mixedCaseDistinctAvroSchema).set("string_col", "b").set("updated_at", initialUpdatedAt.plusMillis(3000).toEpochMilli()).build())).build();
kafka.sendMessages(mixedCaseDistinctProducerRecords.stream(), schemaRegistryAwareProducer(kafka));
pinot.createSchema(getClass().getClassLoader().getResourceAsStream("mixed_case_distinct_schema.json"), MIXED_CASE_DISTINCT_TABLE);
pinot.addRealTimeTable(getClass().getClassLoader().getResourceAsStream("mixed_case_distinct_realtimeSpec.json"), MIXED_CASE_DISTINCT_TABLE);
// Create and populate too many rows table and topic
kafka.createTopic(TOO_MANY_ROWS_TABLE);
Schema tooManyRowsAvroSchema = SchemaBuilder.record(TOO_MANY_ROWS_TABLE).fields().name("string_col").type().optional().stringType().name("updatedAt").type().optional().longType().endRecord();
ImmutableList.Builder<ProducerRecord<String, GenericRecord>> tooManyRowsRecordsBuilder = ImmutableList.builder();
for (int i = 0; i < MAX_ROWS_PER_SPLIT_FOR_SEGMENT_QUERIES + 1; i++) {
tooManyRowsRecordsBuilder.add(new ProducerRecord<>(TOO_MANY_ROWS_TABLE, "key" + i, new GenericRecordBuilder(tooManyRowsAvroSchema).set("string_col", "string_" + i).set("updatedAt", initialUpdatedAt.plusMillis(i * 1000).toEpochMilli()).build()));
}
// Add a null row, verify it was not ingested as pinot does not accept null time column values.
// The data is verified in testBrokerQueryWithTooManyRowsForSegmentQuery
tooManyRowsRecordsBuilder.add(new ProducerRecord<>(TOO_MANY_ROWS_TABLE, "key" + MAX_ROWS_PER_SPLIT_FOR_SEGMENT_QUERIES, new GenericRecordBuilder(tooManyRowsAvroSchema).build()));
kafka.sendMessages(tooManyRowsRecordsBuilder.build().stream(), schemaRegistryAwareProducer(kafka));
pinot.createSchema(getClass().getClassLoader().getResourceAsStream("too_many_rows_schema.json"), TOO_MANY_ROWS_TABLE);
pinot.addRealTimeTable(getClass().getClassLoader().getResourceAsStream("too_many_rows_realtimeSpec.json"), TOO_MANY_ROWS_TABLE);
// Create and populate too many broker rows table and topic
kafka.createTopic(TOO_MANY_BROKER_ROWS_TABLE);
Schema tooManyBrokerRowsAvroSchema = SchemaBuilder.record(TOO_MANY_BROKER_ROWS_TABLE).fields().name("string_col").type().optional().stringType().name("updatedAt").type().optional().longType().endRecord();
ImmutableList.Builder<ProducerRecord<String, GenericRecord>> tooManyBrokerRowsRecordsBuilder = ImmutableList.builder();
for (int i = 0; i < MAX_ROWS_PER_SPLIT_FOR_BROKER_QUERIES + 1; i++) {
tooManyBrokerRowsRecordsBuilder.add(new ProducerRecord<>(TOO_MANY_BROKER_ROWS_TABLE, "key" + i, new GenericRecordBuilder(tooManyBrokerRowsAvroSchema).set("string_col", "string_" + i).set("updatedAt", initialUpdatedAt.plusMillis(i * 1000).toEpochMilli()).build()));
}
kafka.sendMessages(tooManyBrokerRowsRecordsBuilder.build().stream(), schemaRegistryAwareProducer(kafka));
pinot.createSchema(getClass().getClassLoader().getResourceAsStream("too_many_broker_rows_schema.json"), TOO_MANY_BROKER_ROWS_TABLE);
pinot.addRealTimeTable(getClass().getClassLoader().getResourceAsStream("too_many_broker_rows_realtimeSpec.json"), TOO_MANY_BROKER_ROWS_TABLE);
// Create and populate date time fields table and topic
kafka.createTopic(DATE_TIME_FIELDS_TABLE);
Schema dateTimeFieldsAvroSchema = SchemaBuilder.record(DATE_TIME_FIELDS_TABLE).fields().name("string_col").type().stringType().noDefault().name("created_at").type().longType().noDefault().name("updated_at").type().longType().noDefault().endRecord();
List<ProducerRecord<String, GenericRecord>> dateTimeFieldsProducerRecords = ImmutableList.<ProducerRecord<String, GenericRecord>>builder().add(new ProducerRecord<>(DATE_TIME_FIELDS_TABLE, "string_0", new GenericRecordBuilder(dateTimeFieldsAvroSchema).set("string_col", "string_0").set("created_at", CREATED_AT_INSTANT.toEpochMilli()).set("updated_at", initialUpdatedAt.toEpochMilli()).build())).add(new ProducerRecord<>(DATE_TIME_FIELDS_TABLE, "string_1", new GenericRecordBuilder(dateTimeFieldsAvroSchema).set("string_col", "string_1").set("created_at", CREATED_AT_INSTANT.plusMillis(1000).toEpochMilli()).set("updated_at", initialUpdatedAt.plusMillis(1000).toEpochMilli()).build())).add(new ProducerRecord<>(DATE_TIME_FIELDS_TABLE, "string_2", new GenericRecordBuilder(dateTimeFieldsAvroSchema).set("string_col", "string_2").set("created_at", CREATED_AT_INSTANT.plusMillis(2000).toEpochMilli()).set("updated_at", initialUpdatedAt.plusMillis(2000).toEpochMilli()).build())).build();
kafka.sendMessages(dateTimeFieldsProducerRecords.stream(), schemaRegistryAwareProducer(kafka));
pinot.createSchema(getClass().getClassLoader().getResourceAsStream("date_time_fields_schema.json"), DATE_TIME_FIELDS_TABLE);
pinot.addRealTimeTable(getClass().getClassLoader().getResourceAsStream("date_time_fields_realtimeSpec.json"), DATE_TIME_FIELDS_TABLE);
// Create json table
kafka.createTopic(JSON_TABLE);
long key = 0L;
kafka.sendMessages(Stream.of(new ProducerRecord<>(JSON_TABLE, key++, TestingJsonRecord.of("vendor1", "Los Angeles", Arrays.asList("foo1", "bar1", "baz1"), Arrays.asList(5, 6, 7), Arrays.asList(3.5F, 5.5F), Arrays.asList(10_000.5D, 20_000.335D, -3.7D), Arrays.asList(10_000L, 20_000_000L, -37L), 4)), new ProducerRecord<>(JSON_TABLE, key++, TestingJsonRecord.of("vendor2", "New York", Arrays.asList("foo2", "bar1", "baz1"), Arrays.asList(6, 7, 8), Arrays.asList(4.5F, 6.5F), Arrays.asList(10_000.5D, 20_000.335D, -3.7D), Arrays.asList(10_000L, 20_000_000L, -37L), 6)), new ProducerRecord<>(JSON_TABLE, key++, TestingJsonRecord.of("vendor3", "Los Angeles", Arrays.asList("foo3", "bar2", "baz1"), Arrays.asList(7, 8, 9), Arrays.asList(5.5F, 7.5F), Arrays.asList(10_000.5D, 20_000.335D, -3.7D), Arrays.asList(10_000L, 20_000_000L, -37L), 8)), new ProducerRecord<>(JSON_TABLE, key++, TestingJsonRecord.of("vendor4", "New York", Arrays.asList("foo4", "bar2", "baz2"), Arrays.asList(8, 9, 10), Arrays.asList(6.5F, 8.5F), Arrays.asList(10_000.5D, 20_000.335D, -3.7D), Arrays.asList(10_000L, 20_000_000L, -37L), 10)), new ProducerRecord<>(JSON_TABLE, key++, TestingJsonRecord.of("vendor5", "Los Angeles", Arrays.asList("foo5", "bar3", "baz2"), Arrays.asList(9, 10, 11), Arrays.asList(7.5F, 9.5F), Arrays.asList(10_000.5D, 20_000.335D, -3.7D), Arrays.asList(10_000L, 20_000_000L, -37L), 12)), new ProducerRecord<>(JSON_TABLE, key++, TestingJsonRecord.of("vendor6", "Los Angeles", Arrays.asList("foo6", "bar3", "baz2"), Arrays.asList(10, 11, 12), Arrays.asList(8.5F, 10.5F), Arrays.asList(10_000.5D, 20_000.335D, -3.7D), Arrays.asList(10_000L, 20_000_000L, -37L), 12)), new ProducerRecord<>(JSON_TABLE, key, TestingJsonRecord.of("vendor7", "Los Angeles", Arrays.asList("foo6", "bar3", "baz2"), Arrays.asList(10, 11, 12), Arrays.asList(9.5F, 10.5F), Arrays.asList(10_000.5D, 20_000.335D, -3.7D), Arrays.asList(10_000L, 20_000_000L, -37L), 12))));
pinot.createSchema(getClass().getClassLoader().getResourceAsStream("schema.json"), JSON_TABLE);
pinot.addRealTimeTable(getClass().getClassLoader().getResourceAsStream("realtimeSpec.json"), JSON_TABLE);
// Create a table having reserved keyword column names
kafka.createTopic(RESERVED_KEYWORD_TABLE);
Schema reservedKeywordAvroSchema = SchemaBuilder.record(RESERVED_KEYWORD_TABLE).fields().name("date").type().optional().stringType().name("as").type().optional().stringType().name("updatedAt").type().optional().longType().endRecord();
ImmutableList.Builder<ProducerRecord<String, GenericRecord>> reservedKeywordRecordsBuilder = ImmutableList.builder();
reservedKeywordRecordsBuilder.add(new ProducerRecord<>(RESERVED_KEYWORD_TABLE, "key0", new GenericRecordBuilder(reservedKeywordAvroSchema).set("date", "2021-09-30").set("as", "foo").set("updatedAt", initialUpdatedAt.plusMillis(1000).toEpochMilli()).build()));
reservedKeywordRecordsBuilder.add(new ProducerRecord<>(RESERVED_KEYWORD_TABLE, "key1", new GenericRecordBuilder(reservedKeywordAvroSchema).set("date", "2021-10-01").set("as", "bar").set("updatedAt", initialUpdatedAt.plusMillis(2000).toEpochMilli()).build()));
kafka.sendMessages(reservedKeywordRecordsBuilder.build().stream(), schemaRegistryAwareProducer(kafka));
pinot.createSchema(getClass().getClassLoader().getResourceAsStream("reserved_keyword_schema.json"), RESERVED_KEYWORD_TABLE);
pinot.addRealTimeTable(getClass().getClassLoader().getResourceAsStream("reserved_keyword_realtimeSpec.json"), RESERVED_KEYWORD_TABLE);
// Create a table having quotes in column names
kafka.createTopic(QUOTES_IN_COLUMN_NAME_TABLE);
Schema quotesInColumnNameAvroSchema = SchemaBuilder.record(QUOTES_IN_COLUMN_NAME_TABLE).fields().name("non_quoted").type().optional().stringType().name("updatedAt").type().optional().longType().endRecord();
ImmutableList.Builder<ProducerRecord<String, GenericRecord>> quotesInColumnNameRecordsBuilder = ImmutableList.builder();
quotesInColumnNameRecordsBuilder.add(new ProducerRecord<>(QUOTES_IN_COLUMN_NAME_TABLE, "key0", new GenericRecordBuilder(quotesInColumnNameAvroSchema).set("non_quoted", "Foo").set("updatedAt", initialUpdatedAt.plusMillis(1000).toEpochMilli()).build()));
quotesInColumnNameRecordsBuilder.add(new ProducerRecord<>(QUOTES_IN_COLUMN_NAME_TABLE, "key1", new GenericRecordBuilder(quotesInColumnNameAvroSchema).set("non_quoted", "Bar").set("updatedAt", initialUpdatedAt.plusMillis(2000).toEpochMilli()).build()));
kafka.sendMessages(quotesInColumnNameRecordsBuilder.build().stream(), schemaRegistryAwareProducer(kafka));
pinot.createSchema(getClass().getClassLoader().getResourceAsStream("quotes_in_column_name_schema.json"), QUOTES_IN_COLUMN_NAME_TABLE);
pinot.addRealTimeTable(getClass().getClassLoader().getResourceAsStream("quotes_in_column_name_realtimeSpec.json"), QUOTES_IN_COLUMN_NAME_TABLE);
// Create a table having multiple columns with duplicate values
kafka.createTopic(DUPLICATE_VALUES_IN_COLUMNS_TABLE);
Schema duplicateValuesInColumnsAvroSchema = SchemaBuilder.record(DUPLICATE_VALUES_IN_COLUMNS_TABLE).fields().name("dim_col").type().optional().longType().name("another_dim_col").type().optional().longType().name("string_col").type().optional().stringType().name("another_string_col").type().optional().stringType().name("metric_col1").type().optional().longType().name("metric_col2").type().optional().longType().name("updated_at").type().longType().noDefault().endRecord();
ImmutableList.Builder<ProducerRecord<String, GenericRecord>> duplicateValuesInColumnsRecordsBuilder = ImmutableList.builder();
duplicateValuesInColumnsRecordsBuilder.add(new ProducerRecord<>(DUPLICATE_VALUES_IN_COLUMNS_TABLE, "key0", new GenericRecordBuilder(duplicateValuesInColumnsAvroSchema).set("dim_col", 1000L).set("another_dim_col", 1000L).set("string_col", "string1").set("another_string_col", "string1").set("metric_col1", 10L).set("metric_col2", 20L).set("updated_at", initialUpdatedAt.plusMillis(1000).toEpochMilli()).build()));
duplicateValuesInColumnsRecordsBuilder.add(new ProducerRecord<>(DUPLICATE_VALUES_IN_COLUMNS_TABLE, "key1", new GenericRecordBuilder(duplicateValuesInColumnsAvroSchema).set("dim_col", 2000L).set("another_dim_col", 2000L).set("string_col", "string1").set("another_string_col", "string1").set("metric_col1", 100L).set("metric_col2", 200L).set("updated_at", initialUpdatedAt.plusMillis(2000).toEpochMilli()).build()));
duplicateValuesInColumnsRecordsBuilder.add(new ProducerRecord<>(DUPLICATE_VALUES_IN_COLUMNS_TABLE, "key2", new GenericRecordBuilder(duplicateValuesInColumnsAvroSchema).set("dim_col", 3000L).set("another_dim_col", 3000L).set("string_col", "string1").set("another_string_col", "another_string1").set("metric_col1", 1000L).set("metric_col2", 2000L).set("updated_at", initialUpdatedAt.plusMillis(3000).toEpochMilli()).build()));
duplicateValuesInColumnsRecordsBuilder.add(new ProducerRecord<>(DUPLICATE_VALUES_IN_COLUMNS_TABLE, "key1", new GenericRecordBuilder(duplicateValuesInColumnsAvroSchema).set("dim_col", 4000L).set("another_dim_col", 4000L).set("string_col", "string2").set("another_string_col", "another_string2").set("metric_col1", 100L).set("metric_col2", 200L).set("updated_at", initialUpdatedAt.plusMillis(4000).toEpochMilli()).build()));
duplicateValuesInColumnsRecordsBuilder.add(new ProducerRecord<>(DUPLICATE_VALUES_IN_COLUMNS_TABLE, "key2", new GenericRecordBuilder(duplicateValuesInColumnsAvroSchema).set("dim_col", 4000L).set("another_dim_col", 4001L).set("string_col", "string2").set("another_string_col", "string2").set("metric_col1", 1000L).set("metric_col2", 2000L).set("updated_at", initialUpdatedAt.plusMillis(5000).toEpochMilli()).build()));
kafka.sendMessages(duplicateValuesInColumnsRecordsBuilder.build().stream(), schemaRegistryAwareProducer(kafka));
pinot.createSchema(getClass().getClassLoader().getResourceAsStream("duplicate_values_in_columns_schema.json"), DUPLICATE_VALUES_IN_COLUMNS_TABLE);
pinot.addRealTimeTable(getClass().getClassLoader().getResourceAsStream("duplicate_values_in_columns_realtimeSpec.json"), DUPLICATE_VALUES_IN_COLUMNS_TABLE);
return PinotQueryRunner.createPinotQueryRunner(ImmutableMap.of(), pinotProperties(pinot), Optional.of(binder -> newOptionalBinder(binder, PinotHostMapper.class).setBinding().toInstance(new TestingPinotHostMapper(pinot.getBrokerHostAndPort(), pinot.getServerHostAndPort()))));
}
Aggregations