Search in sources :

Example 26 with QueryRunner

use of io.trino.testing.QueryRunner in project trino by trinodb.

the class AbstractPinotIntegrationSmokeTest method createQueryRunner.

@Override
protected QueryRunner createQueryRunner() throws Exception {
    TestingKafka kafka = closeAfterClass(TestingKafka.createWithSchemaRegistry());
    kafka.start();
    TestingPinotCluster pinot = closeAfterClass(new TestingPinotCluster(kafka.getNetwork(), isSecured()));
    pinot.start();
    // Create and populate the all_types topic and table
    kafka.createTopic(ALL_TYPES_TABLE);
    ImmutableList.Builder<ProducerRecord<String, GenericRecord>> allTypesRecordsBuilder = ImmutableList.builder();
    for (int i = 0, step = 1200; i < MAX_ROWS_PER_SPLIT_FOR_SEGMENT_QUERIES - 2; i++) {
        int offset = i * step;
        allTypesRecordsBuilder.add(new ProducerRecord<>(ALL_TYPES_TABLE, "key" + i * step, createTestRecord(Arrays.asList("string_" + (offset), "string1_" + (offset + 1), "string2_" + (offset + 2)), true, Arrays.asList(54 + i / 3, -10001, 1000), Arrays.asList(-7.33F + i, Float.POSITIVE_INFINITY, 17.034F + i), Arrays.asList(-17.33D + i, Double.POSITIVE_INFINITY, 10596.034D + i), Arrays.asList(-3147483647L + i, 12L - i, 4147483647L + i), initialUpdatedAt.plusMillis(offset).toEpochMilli())));
    }
    allTypesRecordsBuilder.add(new ProducerRecord<>(ALL_TYPES_TABLE, null, createNullRecord()));
    allTypesRecordsBuilder.add(new ProducerRecord<>(ALL_TYPES_TABLE, null, createArrayNullRecord()));
    kafka.sendMessages(allTypesRecordsBuilder.build().stream(), schemaRegistryAwareProducer(kafka));
    pinot.createSchema(getClass().getClassLoader().getResourceAsStream("alltypes_schema.json"), ALL_TYPES_TABLE);
    pinot.addRealTimeTable(getClass().getClassLoader().getResourceAsStream("alltypes_realtimeSpec.json"), ALL_TYPES_TABLE);
    // Create and populate mixed case table and topic
    kafka.createTopic(MIXED_CASE_COLUMN_NAMES_TABLE);
    Schema mixedCaseAvroSchema = SchemaBuilder.record(MIXED_CASE_COLUMN_NAMES_TABLE).fields().name("stringCol").type().stringType().noDefault().name("longCol").type().optional().longType().name("updatedAt").type().longType().noDefault().endRecord();
    List<ProducerRecord<String, GenericRecord>> mixedCaseProducerRecords = ImmutableList.<ProducerRecord<String, GenericRecord>>builder().add(new ProducerRecord<>(MIXED_CASE_COLUMN_NAMES_TABLE, "key0", new GenericRecordBuilder(mixedCaseAvroSchema).set("stringCol", "string_0").set("longCol", 0L).set("updatedAt", initialUpdatedAt.toEpochMilli()).build())).add(new ProducerRecord<>(MIXED_CASE_COLUMN_NAMES_TABLE, "key1", new GenericRecordBuilder(mixedCaseAvroSchema).set("stringCol", "string_1").set("longCol", 1L).set("updatedAt", initialUpdatedAt.plusMillis(1000).toEpochMilli()).build())).add(new ProducerRecord<>(MIXED_CASE_COLUMN_NAMES_TABLE, "key2", new GenericRecordBuilder(mixedCaseAvroSchema).set("stringCol", "string_2").set("longCol", 2L).set("updatedAt", initialUpdatedAt.plusMillis(2000).toEpochMilli()).build())).add(new ProducerRecord<>(MIXED_CASE_COLUMN_NAMES_TABLE, "key3", new GenericRecordBuilder(mixedCaseAvroSchema).set("stringCol", "string_3").set("longCol", 3L).set("updatedAt", initialUpdatedAt.plusMillis(3000).toEpochMilli()).build())).build();
    kafka.sendMessages(mixedCaseProducerRecords.stream(), schemaRegistryAwareProducer(kafka));
    pinot.createSchema(getClass().getClassLoader().getResourceAsStream("mixed_case_schema.json"), MIXED_CASE_COLUMN_NAMES_TABLE);
    pinot.addRealTimeTable(getClass().getClassLoader().getResourceAsStream("mixed_case_realtimeSpec.json"), MIXED_CASE_COLUMN_NAMES_TABLE);
    // Create and populate mixed case distinct table and topic
    kafka.createTopic(MIXED_CASE_DISTINCT_TABLE);
    Schema mixedCaseDistinctAvroSchema = SchemaBuilder.record(MIXED_CASE_DISTINCT_TABLE).fields().name("string_col").type().stringType().noDefault().name("updated_at").type().longType().noDefault().endRecord();
    List<ProducerRecord<String, GenericRecord>> mixedCaseDistinctProducerRecords = ImmutableList.<ProducerRecord<String, GenericRecord>>builder().add(new ProducerRecord<>(MIXED_CASE_DISTINCT_TABLE, "key0", new GenericRecordBuilder(mixedCaseDistinctAvroSchema).set("string_col", "A").set("updated_at", initialUpdatedAt.toEpochMilli()).build())).add(new ProducerRecord<>(MIXED_CASE_DISTINCT_TABLE, "key1", new GenericRecordBuilder(mixedCaseDistinctAvroSchema).set("string_col", "a").set("updated_at", initialUpdatedAt.plusMillis(1000).toEpochMilli()).build())).add(new ProducerRecord<>(MIXED_CASE_DISTINCT_TABLE, "key2", new GenericRecordBuilder(mixedCaseDistinctAvroSchema).set("string_col", "B").set("updated_at", initialUpdatedAt.plusMillis(2000).toEpochMilli()).build())).add(new ProducerRecord<>(MIXED_CASE_DISTINCT_TABLE, "key3", new GenericRecordBuilder(mixedCaseDistinctAvroSchema).set("string_col", "b").set("updated_at", initialUpdatedAt.plusMillis(3000).toEpochMilli()).build())).build();
    kafka.sendMessages(mixedCaseDistinctProducerRecords.stream(), schemaRegistryAwareProducer(kafka));
    pinot.createSchema(getClass().getClassLoader().getResourceAsStream("mixed_case_distinct_schema.json"), MIXED_CASE_DISTINCT_TABLE);
    pinot.addRealTimeTable(getClass().getClassLoader().getResourceAsStream("mixed_case_distinct_realtimeSpec.json"), MIXED_CASE_DISTINCT_TABLE);
    // Create and populate too many rows table and topic
    kafka.createTopic(TOO_MANY_ROWS_TABLE);
    Schema tooManyRowsAvroSchema = SchemaBuilder.record(TOO_MANY_ROWS_TABLE).fields().name("string_col").type().optional().stringType().name("updatedAt").type().optional().longType().endRecord();
    ImmutableList.Builder<ProducerRecord<String, GenericRecord>> tooManyRowsRecordsBuilder = ImmutableList.builder();
    for (int i = 0; i < MAX_ROWS_PER_SPLIT_FOR_SEGMENT_QUERIES + 1; i++) {
        tooManyRowsRecordsBuilder.add(new ProducerRecord<>(TOO_MANY_ROWS_TABLE, "key" + i, new GenericRecordBuilder(tooManyRowsAvroSchema).set("string_col", "string_" + i).set("updatedAt", initialUpdatedAt.plusMillis(i * 1000).toEpochMilli()).build()));
    }
    // Add a null row, verify it was not ingested as pinot does not accept null time column values.
    // The data is verified in testBrokerQueryWithTooManyRowsForSegmentQuery
    tooManyRowsRecordsBuilder.add(new ProducerRecord<>(TOO_MANY_ROWS_TABLE, "key" + MAX_ROWS_PER_SPLIT_FOR_SEGMENT_QUERIES, new GenericRecordBuilder(tooManyRowsAvroSchema).build()));
    kafka.sendMessages(tooManyRowsRecordsBuilder.build().stream(), schemaRegistryAwareProducer(kafka));
    pinot.createSchema(getClass().getClassLoader().getResourceAsStream("too_many_rows_schema.json"), TOO_MANY_ROWS_TABLE);
    pinot.addRealTimeTable(getClass().getClassLoader().getResourceAsStream("too_many_rows_realtimeSpec.json"), TOO_MANY_ROWS_TABLE);
    // Create and populate too many broker rows table and topic
    kafka.createTopic(TOO_MANY_BROKER_ROWS_TABLE);
    Schema tooManyBrokerRowsAvroSchema = SchemaBuilder.record(TOO_MANY_BROKER_ROWS_TABLE).fields().name("string_col").type().optional().stringType().name("updatedAt").type().optional().longType().endRecord();
    ImmutableList.Builder<ProducerRecord<String, GenericRecord>> tooManyBrokerRowsRecordsBuilder = ImmutableList.builder();
    for (int i = 0; i < MAX_ROWS_PER_SPLIT_FOR_BROKER_QUERIES + 1; i++) {
        tooManyBrokerRowsRecordsBuilder.add(new ProducerRecord<>(TOO_MANY_BROKER_ROWS_TABLE, "key" + i, new GenericRecordBuilder(tooManyBrokerRowsAvroSchema).set("string_col", "string_" + i).set("updatedAt", initialUpdatedAt.plusMillis(i * 1000).toEpochMilli()).build()));
    }
    kafka.sendMessages(tooManyBrokerRowsRecordsBuilder.build().stream(), schemaRegistryAwareProducer(kafka));
    pinot.createSchema(getClass().getClassLoader().getResourceAsStream("too_many_broker_rows_schema.json"), TOO_MANY_BROKER_ROWS_TABLE);
    pinot.addRealTimeTable(getClass().getClassLoader().getResourceAsStream("too_many_broker_rows_realtimeSpec.json"), TOO_MANY_BROKER_ROWS_TABLE);
    // Create and populate date time fields table and topic
    kafka.createTopic(DATE_TIME_FIELDS_TABLE);
    Schema dateTimeFieldsAvroSchema = SchemaBuilder.record(DATE_TIME_FIELDS_TABLE).fields().name("string_col").type().stringType().noDefault().name("created_at").type().longType().noDefault().name("updated_at").type().longType().noDefault().endRecord();
    List<ProducerRecord<String, GenericRecord>> dateTimeFieldsProducerRecords = ImmutableList.<ProducerRecord<String, GenericRecord>>builder().add(new ProducerRecord<>(DATE_TIME_FIELDS_TABLE, "string_0", new GenericRecordBuilder(dateTimeFieldsAvroSchema).set("string_col", "string_0").set("created_at", CREATED_AT_INSTANT.toEpochMilli()).set("updated_at", initialUpdatedAt.toEpochMilli()).build())).add(new ProducerRecord<>(DATE_TIME_FIELDS_TABLE, "string_1", new GenericRecordBuilder(dateTimeFieldsAvroSchema).set("string_col", "string_1").set("created_at", CREATED_AT_INSTANT.plusMillis(1000).toEpochMilli()).set("updated_at", initialUpdatedAt.plusMillis(1000).toEpochMilli()).build())).add(new ProducerRecord<>(DATE_TIME_FIELDS_TABLE, "string_2", new GenericRecordBuilder(dateTimeFieldsAvroSchema).set("string_col", "string_2").set("created_at", CREATED_AT_INSTANT.plusMillis(2000).toEpochMilli()).set("updated_at", initialUpdatedAt.plusMillis(2000).toEpochMilli()).build())).build();
    kafka.sendMessages(dateTimeFieldsProducerRecords.stream(), schemaRegistryAwareProducer(kafka));
    pinot.createSchema(getClass().getClassLoader().getResourceAsStream("date_time_fields_schema.json"), DATE_TIME_FIELDS_TABLE);
    pinot.addRealTimeTable(getClass().getClassLoader().getResourceAsStream("date_time_fields_realtimeSpec.json"), DATE_TIME_FIELDS_TABLE);
    // Create json table
    kafka.createTopic(JSON_TABLE);
    long key = 0L;
    kafka.sendMessages(Stream.of(new ProducerRecord<>(JSON_TABLE, key++, TestingJsonRecord.of("vendor1", "Los Angeles", Arrays.asList("foo1", "bar1", "baz1"), Arrays.asList(5, 6, 7), Arrays.asList(3.5F, 5.5F), Arrays.asList(10_000.5D, 20_000.335D, -3.7D), Arrays.asList(10_000L, 20_000_000L, -37L), 4)), new ProducerRecord<>(JSON_TABLE, key++, TestingJsonRecord.of("vendor2", "New York", Arrays.asList("foo2", "bar1", "baz1"), Arrays.asList(6, 7, 8), Arrays.asList(4.5F, 6.5F), Arrays.asList(10_000.5D, 20_000.335D, -3.7D), Arrays.asList(10_000L, 20_000_000L, -37L), 6)), new ProducerRecord<>(JSON_TABLE, key++, TestingJsonRecord.of("vendor3", "Los Angeles", Arrays.asList("foo3", "bar2", "baz1"), Arrays.asList(7, 8, 9), Arrays.asList(5.5F, 7.5F), Arrays.asList(10_000.5D, 20_000.335D, -3.7D), Arrays.asList(10_000L, 20_000_000L, -37L), 8)), new ProducerRecord<>(JSON_TABLE, key++, TestingJsonRecord.of("vendor4", "New York", Arrays.asList("foo4", "bar2", "baz2"), Arrays.asList(8, 9, 10), Arrays.asList(6.5F, 8.5F), Arrays.asList(10_000.5D, 20_000.335D, -3.7D), Arrays.asList(10_000L, 20_000_000L, -37L), 10)), new ProducerRecord<>(JSON_TABLE, key++, TestingJsonRecord.of("vendor5", "Los Angeles", Arrays.asList("foo5", "bar3", "baz2"), Arrays.asList(9, 10, 11), Arrays.asList(7.5F, 9.5F), Arrays.asList(10_000.5D, 20_000.335D, -3.7D), Arrays.asList(10_000L, 20_000_000L, -37L), 12)), new ProducerRecord<>(JSON_TABLE, key++, TestingJsonRecord.of("vendor6", "Los Angeles", Arrays.asList("foo6", "bar3", "baz2"), Arrays.asList(10, 11, 12), Arrays.asList(8.5F, 10.5F), Arrays.asList(10_000.5D, 20_000.335D, -3.7D), Arrays.asList(10_000L, 20_000_000L, -37L), 12)), new ProducerRecord<>(JSON_TABLE, key, TestingJsonRecord.of("vendor7", "Los Angeles", Arrays.asList("foo6", "bar3", "baz2"), Arrays.asList(10, 11, 12), Arrays.asList(9.5F, 10.5F), Arrays.asList(10_000.5D, 20_000.335D, -3.7D), Arrays.asList(10_000L, 20_000_000L, -37L), 12))));
    pinot.createSchema(getClass().getClassLoader().getResourceAsStream("schema.json"), JSON_TABLE);
    pinot.addRealTimeTable(getClass().getClassLoader().getResourceAsStream("realtimeSpec.json"), JSON_TABLE);
    // Create a table having reserved keyword column names
    kafka.createTopic(RESERVED_KEYWORD_TABLE);
    Schema reservedKeywordAvroSchema = SchemaBuilder.record(RESERVED_KEYWORD_TABLE).fields().name("date").type().optional().stringType().name("as").type().optional().stringType().name("updatedAt").type().optional().longType().endRecord();
    ImmutableList.Builder<ProducerRecord<String, GenericRecord>> reservedKeywordRecordsBuilder = ImmutableList.builder();
    reservedKeywordRecordsBuilder.add(new ProducerRecord<>(RESERVED_KEYWORD_TABLE, "key0", new GenericRecordBuilder(reservedKeywordAvroSchema).set("date", "2021-09-30").set("as", "foo").set("updatedAt", initialUpdatedAt.plusMillis(1000).toEpochMilli()).build()));
    reservedKeywordRecordsBuilder.add(new ProducerRecord<>(RESERVED_KEYWORD_TABLE, "key1", new GenericRecordBuilder(reservedKeywordAvroSchema).set("date", "2021-10-01").set("as", "bar").set("updatedAt", initialUpdatedAt.plusMillis(2000).toEpochMilli()).build()));
    kafka.sendMessages(reservedKeywordRecordsBuilder.build().stream(), schemaRegistryAwareProducer(kafka));
    pinot.createSchema(getClass().getClassLoader().getResourceAsStream("reserved_keyword_schema.json"), RESERVED_KEYWORD_TABLE);
    pinot.addRealTimeTable(getClass().getClassLoader().getResourceAsStream("reserved_keyword_realtimeSpec.json"), RESERVED_KEYWORD_TABLE);
    // Create a table having quotes in column names
    kafka.createTopic(QUOTES_IN_COLUMN_NAME_TABLE);
    Schema quotesInColumnNameAvroSchema = SchemaBuilder.record(QUOTES_IN_COLUMN_NAME_TABLE).fields().name("non_quoted").type().optional().stringType().name("updatedAt").type().optional().longType().endRecord();
    ImmutableList.Builder<ProducerRecord<String, GenericRecord>> quotesInColumnNameRecordsBuilder = ImmutableList.builder();
    quotesInColumnNameRecordsBuilder.add(new ProducerRecord<>(QUOTES_IN_COLUMN_NAME_TABLE, "key0", new GenericRecordBuilder(quotesInColumnNameAvroSchema).set("non_quoted", "Foo").set("updatedAt", initialUpdatedAt.plusMillis(1000).toEpochMilli()).build()));
    quotesInColumnNameRecordsBuilder.add(new ProducerRecord<>(QUOTES_IN_COLUMN_NAME_TABLE, "key1", new GenericRecordBuilder(quotesInColumnNameAvroSchema).set("non_quoted", "Bar").set("updatedAt", initialUpdatedAt.plusMillis(2000).toEpochMilli()).build()));
    kafka.sendMessages(quotesInColumnNameRecordsBuilder.build().stream(), schemaRegistryAwareProducer(kafka));
    pinot.createSchema(getClass().getClassLoader().getResourceAsStream("quotes_in_column_name_schema.json"), QUOTES_IN_COLUMN_NAME_TABLE);
    pinot.addRealTimeTable(getClass().getClassLoader().getResourceAsStream("quotes_in_column_name_realtimeSpec.json"), QUOTES_IN_COLUMN_NAME_TABLE);
    // Create a table having multiple columns with duplicate values
    kafka.createTopic(DUPLICATE_VALUES_IN_COLUMNS_TABLE);
    Schema duplicateValuesInColumnsAvroSchema = SchemaBuilder.record(DUPLICATE_VALUES_IN_COLUMNS_TABLE).fields().name("dim_col").type().optional().longType().name("another_dim_col").type().optional().longType().name("string_col").type().optional().stringType().name("another_string_col").type().optional().stringType().name("metric_col1").type().optional().longType().name("metric_col2").type().optional().longType().name("updated_at").type().longType().noDefault().endRecord();
    ImmutableList.Builder<ProducerRecord<String, GenericRecord>> duplicateValuesInColumnsRecordsBuilder = ImmutableList.builder();
    duplicateValuesInColumnsRecordsBuilder.add(new ProducerRecord<>(DUPLICATE_VALUES_IN_COLUMNS_TABLE, "key0", new GenericRecordBuilder(duplicateValuesInColumnsAvroSchema).set("dim_col", 1000L).set("another_dim_col", 1000L).set("string_col", "string1").set("another_string_col", "string1").set("metric_col1", 10L).set("metric_col2", 20L).set("updated_at", initialUpdatedAt.plusMillis(1000).toEpochMilli()).build()));
    duplicateValuesInColumnsRecordsBuilder.add(new ProducerRecord<>(DUPLICATE_VALUES_IN_COLUMNS_TABLE, "key1", new GenericRecordBuilder(duplicateValuesInColumnsAvroSchema).set("dim_col", 2000L).set("another_dim_col", 2000L).set("string_col", "string1").set("another_string_col", "string1").set("metric_col1", 100L).set("metric_col2", 200L).set("updated_at", initialUpdatedAt.plusMillis(2000).toEpochMilli()).build()));
    duplicateValuesInColumnsRecordsBuilder.add(new ProducerRecord<>(DUPLICATE_VALUES_IN_COLUMNS_TABLE, "key2", new GenericRecordBuilder(duplicateValuesInColumnsAvroSchema).set("dim_col", 3000L).set("another_dim_col", 3000L).set("string_col", "string1").set("another_string_col", "another_string1").set("metric_col1", 1000L).set("metric_col2", 2000L).set("updated_at", initialUpdatedAt.plusMillis(3000).toEpochMilli()).build()));
    duplicateValuesInColumnsRecordsBuilder.add(new ProducerRecord<>(DUPLICATE_VALUES_IN_COLUMNS_TABLE, "key1", new GenericRecordBuilder(duplicateValuesInColumnsAvroSchema).set("dim_col", 4000L).set("another_dim_col", 4000L).set("string_col", "string2").set("another_string_col", "another_string2").set("metric_col1", 100L).set("metric_col2", 200L).set("updated_at", initialUpdatedAt.plusMillis(4000).toEpochMilli()).build()));
    duplicateValuesInColumnsRecordsBuilder.add(new ProducerRecord<>(DUPLICATE_VALUES_IN_COLUMNS_TABLE, "key2", new GenericRecordBuilder(duplicateValuesInColumnsAvroSchema).set("dim_col", 4000L).set("another_dim_col", 4001L).set("string_col", "string2").set("another_string_col", "string2").set("metric_col1", 1000L).set("metric_col2", 2000L).set("updated_at", initialUpdatedAt.plusMillis(5000).toEpochMilli()).build()));
    kafka.sendMessages(duplicateValuesInColumnsRecordsBuilder.build().stream(), schemaRegistryAwareProducer(kafka));
    pinot.createSchema(getClass().getClassLoader().getResourceAsStream("duplicate_values_in_columns_schema.json"), DUPLICATE_VALUES_IN_COLUMNS_TABLE);
    pinot.addRealTimeTable(getClass().getClassLoader().getResourceAsStream("duplicate_values_in_columns_realtimeSpec.json"), DUPLICATE_VALUES_IN_COLUMNS_TABLE);
    return PinotQueryRunner.createPinotQueryRunner(ImmutableMap.of(), pinotProperties(pinot), Optional.of(binder -> newOptionalBinder(binder, PinotHostMapper.class).setBinding().toInstance(new TestingPinotHostMapper(pinot.getBrokerHostAndPort(), pinot.getServerHostAndPort()))));
}
Also used : JsonProperty(com.fasterxml.jackson.annotation.JsonProperty) Arrays(java.util.Arrays) KafkaAvroSerializer(io.confluent.kafka.serializers.KafkaAvroSerializer) MaterializedResult(io.trino.testing.MaterializedResult) Assertions.assertThat(org.assertj.core.api.Assertions.assertThat) Test(org.testng.annotations.Test) FilterNode(io.trino.sql.planner.plan.FilterNode) KEY_SERIALIZER_CLASS_CONFIG(org.apache.kafka.clients.producer.ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG) AbstractTestQueryFramework(io.trino.testing.AbstractTestQueryFramework) Duration(java.time.Duration) Map(java.util.Map) StringSerializer(org.apache.kafka.common.serialization.StringSerializer) OptionalBinder.newOptionalBinder(com.google.inject.multibindings.OptionalBinder.newOptionalBinder) MarkDistinctNode(io.trino.sql.planner.plan.MarkDistinctNode) Schema(org.apache.avro.Schema) ImmutableMap(com.google.common.collect.ImmutableMap) Instant(java.time.Instant) StandardCharsets(java.nio.charset.StandardCharsets) String.format(java.lang.String.format) Collectors.joining(java.util.stream.Collectors.joining) List(java.util.List) Stream(java.util.stream.Stream) TestingKafka(io.trino.testing.kafka.TestingKafka) Optional(java.util.Optional) ExchangeNode(io.trino.sql.planner.plan.ExchangeNode) REAL(io.trino.spi.type.RealType.REAL) Session(io.trino.Session) GenericRecordBuilder(org.apache.avro.generic.GenericRecordBuilder) ProducerRecord(org.apache.kafka.clients.producer.ProducerRecord) Hex(org.testcontainers.shaded.org.bouncycastle.util.encoders.Hex) SCHEMA_REGISTRY_URL_CONFIG(io.confluent.kafka.serializers.AbstractKafkaSchemaSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG) LimitNode(io.trino.sql.planner.plan.LimitNode) Assert.assertEquals(org.testng.Assert.assertEquals) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) Assertions.assertThatThrownBy(org.assertj.core.api.Assertions.assertThatThrownBy) Objects.requireNonNull(java.util.Objects.requireNonNull) Assertions.assertThatExceptionOfType(org.assertj.core.api.Assertions.assertThatExceptionOfType) AggregationNode(io.trino.sql.planner.plan.AggregationNode) ProjectNode(io.trino.sql.planner.plan.ProjectNode) SECONDS(java.time.temporal.ChronoUnit.SECONDS) GenericRecord(org.apache.avro.generic.GenericRecord) Iterables.getOnlyElement(com.google.common.collect.Iterables.getOnlyElement) PinotHostMapper(io.trino.plugin.pinot.client.PinotHostMapper) SchemaBuilder(org.apache.avro.SchemaBuilder) DOUBLE(io.trino.spi.type.DoubleType.DOUBLE) QueryRunner(io.trino.testing.QueryRunner) JsonCreator(com.fasterxml.jackson.annotation.JsonCreator) VALUE_SERIALIZER_CLASS_CONFIG(org.apache.kafka.clients.producer.ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG) ImmutableList(com.google.common.collect.ImmutableList) Schema(org.apache.avro.Schema) TestingKafka(io.trino.testing.kafka.TestingKafka) ProducerRecord(org.apache.kafka.clients.producer.ProducerRecord) GenericRecordBuilder(org.apache.avro.generic.GenericRecordBuilder) GenericRecord(org.apache.avro.generic.GenericRecord)

Example 27 with QueryRunner

use of io.trino.testing.QueryRunner in project trino by trinodb.

the class KuduQueryRunnerFactory method createKuduQueryRunner.

public static QueryRunner createKuduQueryRunner(TestingKuduServer kuduServer, Session session) throws Exception {
    QueryRunner runner = null;
    try {
        runner = DistributedQueryRunner.builder(session).build();
        installKuduConnector(kuduServer.getMasterAddress(), runner, session.getSchema().orElse("kudu_smoke_test"), Optional.of(""));
        return runner;
    } catch (Throwable e) {
        closeAllSuppress(e, runner);
        throw e;
    }
}
Also used : DistributedQueryRunner(io.trino.testing.DistributedQueryRunner) QueryRunner(io.trino.testing.QueryRunner)

Example 28 with QueryRunner

use of io.trino.testing.QueryRunner in project trino by trinodb.

the class TestMockConnector method createQueryRunner.

@Override
protected QueryRunner createQueryRunner() throws Exception {
    DistributedQueryRunner queryRunner = DistributedQueryRunner.builder(testSessionBuilder().build()).build();
    queryRunner.installPlugin(new TpchPlugin());
    queryRunner.createCatalog("tpch", "tpch");
    queryRunner.installPlugin(new MockConnectorPlugin(MockConnectorFactory.builder().withListSchemaNames(connectionSession -> ImmutableList.of("default")).withGetColumns(schemaTableName -> {
        if (schemaTableName.equals(new SchemaTableName("default", "nation"))) {
            return TPCH_NATION_SCHEMA;
        }
        return ImmutableList.of(new ColumnMetadata("nationkey", BIGINT));
    }).withGetTableHandle((session, tableName) -> {
        if (tableName.equals(new SchemaTableName("default", "new_table"))) {
            return null;
        }
        return new MockConnectorTableHandle(tableName);
    }).withGetMaterializedViewProperties(() -> ImmutableList.of(durationProperty("refresh_interval", "Time interval after which materialized view will be refreshed", null, false))).withGetMaterializedViews((session, schemaTablePrefix) -> ImmutableMap.of(new SchemaTableName("default", "test_materialized_view"), new ConnectorMaterializedViewDefinition("SELECT nationkey FROM mock.default.test_table", Optional.of(new CatalogSchemaTableName("mock", "default", "test_storage")), Optional.of("mock"), Optional.of("default"), ImmutableList.of(new Column("nationkey", BIGINT.getTypeId())), Optional.empty(), Optional.of("alice"), ImmutableMap.of()))).withData(schemaTableName -> {
        if (schemaTableName.equals(new SchemaTableName("default", "nation"))) {
            return TPCH_NATION_DATA;
        }
        throw new UnsupportedOperationException();
    }).withProcedures(ImmutableSet.of(new TestProcedure().get())).withSchemaProperties(() -> ImmutableList.<PropertyMetadata<?>>builder().add(booleanProperty("boolean_schema_property", "description", false, false)).build()).withTableProperties(() -> ImmutableList.<PropertyMetadata<?>>builder().add(integerProperty("integer_table_property", "description", 0, false)).build()).build()));
    queryRunner.createCatalog("mock", "mock");
    return queryRunner;
}
Also used : ColumnMetadata(io.trino.spi.connector.ColumnMetadata) Test(org.testng.annotations.Test) Column(io.trino.spi.connector.ConnectorMaterializedViewDefinition.Column) PropertyMetadataUtil.durationProperty(io.trino.plugin.base.session.PropertyMetadataUtil.durationProperty) ConnectorMaterializedViewDefinition(io.trino.spi.connector.ConnectorMaterializedViewDefinition) AbstractTestQueryFramework(io.trino.testing.AbstractTestQueryFramework) DistributedQueryRunner(io.trino.testing.DistributedQueryRunner) ImmutableList(com.google.common.collect.ImmutableList) MockConnectorFactory(io.trino.connector.MockConnectorFactory) Assertions.assertThatThrownBy(org.assertj.core.api.Assertions.assertThatThrownBy) TestProcedure(io.trino.procedure.TestProcedure) TpchPlugin(io.trino.plugin.tpch.TpchPlugin) MockConnectorPlugin(io.trino.connector.MockConnectorPlugin) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) TPCH_NATION_DATA(io.trino.connector.MockConnectorEntities.TPCH_NATION_DATA) PropertyMetadata.booleanProperty(io.trino.spi.session.PropertyMetadata.booleanProperty) PropertyMetadata(io.trino.spi.session.PropertyMetadata) TPCH_NATION_SCHEMA(io.trino.connector.MockConnectorEntities.TPCH_NATION_SCHEMA) SchemaTableName(io.trino.spi.connector.SchemaTableName) MockConnectorTableHandle(io.trino.connector.MockConnectorTableHandle) TestingSession.testSessionBuilder(io.trino.testing.TestingSession.testSessionBuilder) BIGINT(io.trino.spi.type.BigintType.BIGINT) CatalogSchemaTableName(io.trino.spi.connector.CatalogSchemaTableName) QueryRunner(io.trino.testing.QueryRunner) PropertyMetadata.integerProperty(io.trino.spi.session.PropertyMetadata.integerProperty) Optional(java.util.Optional) Session(io.trino.Session) DistributedQueryRunner(io.trino.testing.DistributedQueryRunner) ColumnMetadata(io.trino.spi.connector.ColumnMetadata) TestProcedure(io.trino.procedure.TestProcedure) TpchPlugin(io.trino.plugin.tpch.TpchPlugin) ConnectorMaterializedViewDefinition(io.trino.spi.connector.ConnectorMaterializedViewDefinition) MockConnectorPlugin(io.trino.connector.MockConnectorPlugin) SchemaTableName(io.trino.spi.connector.SchemaTableName) CatalogSchemaTableName(io.trino.spi.connector.CatalogSchemaTableName) CatalogSchemaTableName(io.trino.spi.connector.CatalogSchemaTableName) Column(io.trino.spi.connector.ConnectorMaterializedViewDefinition.Column) MockConnectorTableHandle(io.trino.connector.MockConnectorTableHandle) PropertyMetadata(io.trino.spi.session.PropertyMetadata)

Example 29 with QueryRunner

use of io.trino.testing.QueryRunner in project trino by trinodb.

the class TestJoinQueries method testBroadcastJoinDeadlockResolution.

/**
 * This test verifies if a broadcast deadlock is getting properly resolved.
 * <p>
 * A deadlock can happen when the build side of a join overflows the total capacity of the broadcast output buffer.
 * When the broadcast buffer is overflow some data must be discarded. The data from the broadcast output buffer can
 * only be discarded after it is consumed by all consumers. The scheduler is expected to send the "noMoreOutputBuffers"
 * signal when the probe side scheduling is done. However if the probe side scheduling is blocked on split placement
 * the scheduling might never finish. To handle this case a special handling was introduced. When the scheduler detects
 * that the stage is blocked on the split placement and the output buffers of the source tasks of the stage are full the
 * scheduler schedules as many tasks as there are nodes in the cluster (without waiting for the split placement to finish)
 * and sends a signal to the source tasks that no more tasks (thus output buffers) will be created.
 * <p>
 * Note: The test is expected to take ~25 second. The increase in run time is contributed by the decreased split queue size and the
 * decreased size of the broadcast output buffer.
 */
@Test(timeOut = 120_000)
public void testBroadcastJoinDeadlockResolution() throws Exception {
    try (QueryRunner queryRunner = TpchQueryRunnerBuilder.builder().setCoordinatorProperties(ImmutableMap.of("join-distribution-type", "BROADCAST", "optimizer.join-reordering-strategy", "NONE", // make sure the probe side will get blocked on a split placement
    "node-scheduler.max-pending-splits-per-task", "1", "node-scheduler.max-splits-per-node", "1", "node-scheduler.max-unacknowledged-splits-per-task", "1")).setExtraProperties(ImmutableMap.of(// make sure the build side will get blocked on a broadcast buffer
    "sink.max-broadcast-buffer-size", "1kB")).withSplitsPerNode(10).build()) {
        String sql = "SELECT * FROM supplier s INNER JOIN lineitem l ON s.suppkey = l.suppkey";
        MaterializedResult actual = queryRunner.execute(sql);
        MaterializedResult expected = getQueryRunner().execute(sql);
        assertEqualsIgnoreOrder(actual, expected, "For query: \n " + sql);
    }
}
Also used : MaterializedResult(io.trino.testing.MaterializedResult) QueryRunner(io.trino.testing.QueryRunner) Test(org.testng.annotations.Test)

Aggregations

QueryRunner (io.trino.testing.QueryRunner)29 DistributedQueryRunner (io.trino.testing.DistributedQueryRunner)17 ImmutableMap (com.google.common.collect.ImmutableMap)13 Test (org.testng.annotations.Test)13 Session (io.trino.Session)12 ImmutableList (com.google.common.collect.ImmutableList)11 Optional (java.util.Optional)11 TestingSession.testSessionBuilder (io.trino.testing.TestingSession.testSessionBuilder)10 SchemaTableName (io.trino.spi.connector.SchemaTableName)9 AbstractTestQueryFramework (io.trino.testing.AbstractTestQueryFramework)9 List (java.util.List)9 Assertions.assertThatThrownBy (org.assertj.core.api.Assertions.assertThatThrownBy)8 ImmutableSet (com.google.common.collect.ImmutableSet)7 MockConnectorFactory (io.trino.connector.MockConnectorFactory)7 TpchPlugin (io.trino.plugin.tpch.TpchPlugin)7 MaterializedResult (io.trino.testing.MaterializedResult)7 String.format (java.lang.String.format)7 Map (java.util.Map)6 AfterClass (org.testng.annotations.AfterClass)6 Plugin (io.trino.spi.Plugin)5