Search in sources :

Example 1 with QueryRunner

use of io.trino.testing.QueryRunner in project trino by trinodb.

the class TestDeltaLakeAdlsConnectorSmokeTest method createTableFromResources.

@Override
void createTableFromResources(String table, String resourcePath, QueryRunner queryRunner) {
    String targetDirectory = bucketName + "/" + table;
    try {
        List<ClassPath.ResourceInfo> resources = ClassPath.from(TestDeltaLakeAdlsConnectorSmokeTest.class.getClassLoader()).getResources().stream().filter(resourceInfo -> resourceInfo.getResourceName().startsWith(resourcePath + "/")).collect(toImmutableList());
        for (ClassPath.ResourceInfo resourceInfo : resources) {
            String fileName = resourceInfo.getResourceName().replaceFirst("^" + Pattern.quote(resourcePath), quoteReplacement(targetDirectory));
            ByteSource byteSource = resourceInfo.asByteSource();
            azureContainerClient.getBlobClient(fileName).upload(byteSource.openBufferedStream(), byteSource.size());
        }
    } catch (IOException e) {
        throw new UncheckedIOException(e);
    }
    queryRunner.execute(format("CREATE TABLE %s (dummy int) WITH (location = '%s')", table, getLocationForTable(bucketName, table)));
}
Also used : DELTA_CATALOG(io.trino.plugin.deltalake.DeltaLakeQueryRunner.DELTA_CATALOG) Assertions.assertThat(org.assertj.core.api.Assertions.assertThat) Test(org.testng.annotations.Test) PosixFilePermissions(java.nio.file.attribute.PosixFilePermissions) BlobClient(com.azure.storage.blob.BlobClient) Duration(java.time.Duration) Map(java.util.Map) Objects.requireNonNull(java.util.Objects.requireNonNull) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) ClassPath(com.google.common.reflect.ClassPath) ByteSource(com.google.common.io.ByteSource) Path(java.nio.file.Path) BlobServiceClientBuilder(com.azure.storage.blob.BlobServiceClientBuilder) BlobItem(com.azure.storage.blob.models.BlobItem) ListBlobsOptions(com.azure.storage.blob.models.ListBlobsOptions) AfterClass(org.testng.annotations.AfterClass) PosixFilePermission(java.nio.file.attribute.PosixFilePermission) BlobContainerClient(com.azure.storage.blob.BlobContainerClient) ImmutableMap(com.google.common.collect.ImmutableMap) Resources(com.google.common.io.Resources) Files(java.nio.file.Files) UTF_8(java.nio.charset.StandardCharsets.UTF_8) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) DockerizedDataLake(io.trino.plugin.deltalake.util.DockerizedDataLake) Set(java.util.Set) Matcher.quoteReplacement(java.util.regex.Matcher.quoteReplacement) IOException(java.io.IOException) FileAttribute(java.nio.file.attribute.FileAttribute) String.format(java.lang.String.format) DeltaLakeQueryRunner.createAbfsDeltaLakeQueryRunner(io.trino.plugin.deltalake.DeltaLakeQueryRunner.createAbfsDeltaLakeQueryRunner) UncheckedIOException(java.io.UncheckedIOException) BlobServiceClient(com.azure.storage.blob.BlobServiceClient) List(java.util.List) Stream(java.util.stream.Stream) TestingHadoop(io.trino.plugin.deltalake.util.TestingHadoop) QueryRunner(io.trino.testing.QueryRunner) Parameters(org.testng.annotations.Parameters) Optional(java.util.Optional) Pattern(java.util.regex.Pattern) ClassPath(com.google.common.reflect.ClassPath) ByteSource(com.google.common.io.ByteSource) UncheckedIOException(java.io.UncheckedIOException) IOException(java.io.IOException) UncheckedIOException(java.io.UncheckedIOException)

Example 2 with QueryRunner

use of io.trino.testing.QueryRunner in project trino by trinodb.

the class TestHivePlans method setUp.

@BeforeClass
public void setUp() {
    QueryRunner queryRunner = getQueryRunner();
    // Use common VALUES for setup so that types are the same and there are no coercions.
    String values = "VALUES ('one', 1), ('two', 2), ('three', 3), ('four', 4), ('five', 5)";
    // partitioned on integer
    queryRunner.execute("CREATE TABLE table_int_partitioned WITH (partitioned_by = ARRAY['int_part']) AS SELECT str_col, int_part FROM (" + values + ") t(str_col, int_part)");
    // partitioned on varchar
    queryRunner.execute("CREATE TABLE table_str_partitioned WITH (partitioned_by = ARRAY['str_part']) AS SELECT int_col, str_part FROM (" + values + ") t(str_part, int_col)");
    // with too many partitions
    queryRunner.execute("CREATE TABLE table_int_with_too_many_partitions WITH (partitioned_by = ARRAY['int_part']) AS SELECT str_col, int_part FROM (" + values + ", ('six', 6)) t(str_col, int_part)");
    // unpartitioned
    queryRunner.execute("CREATE TABLE table_unpartitioned AS SELECT str_col, int_col FROM (" + values + ") t(str_col, int_col)");
}
Also used : LocalQueryRunner(io.trino.testing.LocalQueryRunner) QueryRunner(io.trino.testing.QueryRunner) BeforeClass(org.testng.annotations.BeforeClass)

Example 3 with QueryRunner

use of io.trino.testing.QueryRunner in project trino by trinodb.

the class BaseDeltaLakeConnectorSmokeTest method testDeltaLakeTableLocationChanged.

private void testDeltaLakeTableLocationChanged(boolean fewerEntries, boolean firstPartitioned, boolean secondPartitioned) throws Exception {
    // Create a table with a bunch of transaction log entries
    String tableName = "test_table_location_changed_" + randomTableSuffix();
    String initialLocation = getLocationForTable(bucketName, tableName);
    assertUpdate(format("CREATE TABLE %s (a_number int, a_string varchar) WITH (location = '%s' %s)", tableName, initialLocation, firstPartitioned ? ", partitioned_by = ARRAY['a_number']" : ""));
    BiConsumer<QueryRunner, String> insertABunchOfRows = (queryRunner, prefix) -> {
        queryRunner.execute(format("INSERT INTO %s (a_number, a_string) VALUES (1, '%s one')", tableName, prefix));
        queryRunner.execute(format("INSERT INTO %s (a_number, a_string) VALUES (2, '%s two')", tableName, prefix));
        queryRunner.execute(format("INSERT INTO %s (a_number, a_string) VALUES (3, '%s tree')", tableName, prefix));
        queryRunner.execute(format("INSERT INTO %s (a_number, a_string) VALUES (4, '%s four')", tableName, prefix));
    };
    insertABunchOfRows.accept(getQueryRunner(), "first");
    MaterializedResult initialData = computeActual("SELECT * FROM " + tableName);
    assertThat(initialData.getMaterializedRows()).hasSize(4);
    MaterializedResult expectedDataAfterChange;
    String newLocation;
    try (QueryRunner independentQueryRunner = createDeltaLakeQueryRunner(Map.of())) {
        // Change table's location without main Delta Lake connector (main query runner) knowing about this
        newLocation = getLocationForTable(bucketName, "test_table_location_changed_new_" + randomTableSuffix());
        independentQueryRunner.execute("DROP TABLE " + tableName);
        independentQueryRunner.execute(format("CREATE TABLE %s (a_number int, a_string varchar, another_string varchar) WITH (location = '%s' %s) ", tableName, newLocation, secondPartitioned ? ", partitioned_by = ARRAY['a_number']" : ""));
        if (fewerEntries) {
            // Have fewer transaction log entries so that version mismatch is more apparent (but easier to detect)
            independentQueryRunner.execute(format("INSERT INTO %s VALUES (1, 'second one', 'third column')", tableName));
        } else {
            insertABunchOfRows.accept(independentQueryRunner, "second");
        }
        expectedDataAfterChange = independentQueryRunner.execute("SELECT * FROM " + tableName);
        assertThat(expectedDataAfterChange.getMaterializedRows()).hasSize(fewerEntries ? 1 : 4);
    }
    Stopwatch stopwatch = Stopwatch.createStarted();
    while (true) {
        MaterializedResult currentVisibleData = computeActual("SELECT * FROM " + tableName);
        if (Set.copyOf(currentVisibleData.getMaterializedRows()).equals(Set.copyOf(expectedDataAfterChange.getMaterializedRows()))) {
            // satisfied
            break;
        }
        if (!Set.copyOf(currentVisibleData.getMaterializedRows()).equals(Set.copyOf(initialData.getMaterializedRows()))) {
            throw new AssertionError(format("Unexpected result when reading table: %s,\n expected either initialData: %s\n or expectedDataAfterChange: %s", currentVisibleData, initialData, expectedDataAfterChange));
        }
        if (stopwatch.elapsed(SECONDS) > TEST_METADATA_CACHE_TTL_SECONDS + 10) {
            throw new RuntimeException("Timed out waiting on table to reflect new data from new location");
        }
        SECONDS.sleep(1);
    }
    // Verify table schema gets reflected correctly
    assertThat(computeScalar("SHOW CREATE TABLE " + tableName)).isEqualTo(format("" + "CREATE TABLE %s.%s.%s (\n" + "   a_number integer,\n" + "   a_string varchar,\n" + "   another_string varchar\n" + ")\n" + "WITH (\n" + "   location = '%s',\n" + "   partitioned_by = ARRAY[%s]\n" + ")", getSession().getCatalog().orElseThrow(), getSession().getSchema().orElseThrow(), tableName, newLocation, secondPartitioned ? "'a_number'" : ""));
}
Also used : QueryId(io.trino.spi.QueryId) QueryManager(io.trino.execution.QueryManager) MaterializedResult(io.trino.testing.MaterializedResult) Assertions.assertThat(org.assertj.core.api.Assertions.assertThat) Test(org.testng.annotations.Test) MoreCollectors.onlyElement(com.google.common.collect.MoreCollectors.onlyElement) Duration(io.airlift.units.Duration) CUSTOMER(io.trino.tpch.TpchTable.CUSTOMER) DistributedQueryRunner(io.trino.testing.DistributedQueryRunner) Map(java.util.Map) Sets.union(com.google.common.collect.Sets.union) TestingConnectorBehavior(io.trino.testing.TestingConnectorBehavior) Assert.assertFalse(org.testng.Assert.assertFalse) Assert.assertEquals(io.trino.testing.assertions.Assert.assertEquals) TestTable.randomTableSuffix(io.trino.testing.sql.TestTable.randomTableSuffix) TpchTable(io.trino.tpch.TpchTable) TRANSACTION_LOG_DIRECTORY(io.trino.plugin.deltalake.transactionlog.TransactionLogUtil.TRANSACTION_LOG_DIRECTORY) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) Set(java.util.Set) MILLISECONDS(java.util.concurrent.TimeUnit.MILLISECONDS) String.format(java.lang.String.format) ENABLE_DYNAMIC_FILTERING(io.trino.SystemSessionProperties.ENABLE_DYNAMIC_FILTERING) DataSize(io.airlift.units.DataSize) List(java.util.List) TestingSession.testSessionBuilder(io.trino.testing.TestingSession.testSessionBuilder) Optional(java.util.Optional) BaseConnectorSmokeTest(io.trino.testing.BaseConnectorSmokeTest) LINE_ITEM(io.trino.tpch.TpchTable.LINE_ITEM) Session(io.trino.Session) DELTA_CATALOG(io.trino.plugin.deltalake.DeltaLakeQueryRunner.DELTA_CATALOG) DataProvider(org.testng.annotations.DataProvider) Stopwatch(com.google.common.base.Stopwatch) ORDERS(io.trino.tpch.TpchTable.ORDERS) ImmutableList(com.google.common.collect.ImmutableList) Assertions.assertThatThrownBy(org.assertj.core.api.Assertions.assertThatThrownBy) BiConsumer(java.util.function.BiConsumer) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) OperatorStats(io.trino.operator.OperatorStats) Language(org.intellij.lang.annotations.Language) DockerizedDataLake(io.trino.plugin.deltalake.util.DockerizedDataLake) ResultWithQueryId(io.trino.testing.ResultWithQueryId) JoinDistributionType(io.trino.sql.planner.OptimizerConfig.JoinDistributionType) Assert.assertEventually(io.trino.testing.assertions.Assert.assertEventually) JOIN_DISTRIBUTION_TYPE(io.trino.SystemSessionProperties.JOIN_DISTRIBUTION_TYPE) QueryRunner(io.trino.testing.QueryRunner) TestingHivePlugin(io.trino.plugin.hive.TestingHivePlugin) Assert.assertTrue(org.testng.Assert.assertTrue) SECONDS(java.util.concurrent.TimeUnit.SECONDS) Stopwatch(com.google.common.base.Stopwatch) MaterializedResult(io.trino.testing.MaterializedResult) DistributedQueryRunner(io.trino.testing.DistributedQueryRunner) QueryRunner(io.trino.testing.QueryRunner)

Example 4 with QueryRunner

use of io.trino.testing.QueryRunner in project trino by trinodb.

the class BaseDeltaLakeConnectorSmokeTest method createQueryRunner.

@Override
protected QueryRunner createQueryRunner() throws Exception {
    this.dockerizedDataLake = closeAfterClass(createDockerizedDataLake());
    QueryRunner queryRunner = createDeltaLakeQueryRunner(ImmutableMap.<String, String>builder().put("delta.metadata.cache-ttl", TEST_METADATA_CACHE_TTL_SECONDS + "s").put("hive.metastore-cache-ttl", TEST_METADATA_CACHE_TTL_SECONDS + "s").buildOrThrow());
    queryRunner.execute(format("CREATE SCHEMA %s WITH (location = '%s')", SCHEMA, getLocationForTable(bucketName, SCHEMA)));
    REQUIRED_TPCH_TABLES.forEach(table -> queryRunner.execute(format("CREATE TABLE %s WITH (location = '%s') AS SELECT * FROM tpch.tiny.%1$s", table.getTableName(), getLocationForTable(bucketName, table.getTableName()))));
    /* Data (across 2 files) generated using:
         * INSERT INTO foo VALUES
         *   (1, 100, 'data1'),
         *   (2, 200, 'data2')
         *
         * Data (across 2 files) generated using:
         * INSERT INTO bar VALUES
         *   (100, 'data100'),
         *   (200, 'data200')
         *
         * INSERT INTO old_dates
         * VALUES (DATE '0100-01-01', 1), (DATE '1582-10-15', 2), (DATE '1960-01-01', 3), (DATE '2020-01-01', 4)
         *
         * INSERT INTO test_timestamps VALUES
         * (TIMESTAMP '0100-01-01 01:02:03', 1), (TIMESTAMP '1582-10-15 01:02:03', 2), (TIMESTAMP '1960-01-01 01:02:03', 3), (TIMESTAMP '2020-01-01 01:02:03', 4);
         */
    NON_TPCH_TABLES.forEach(table -> {
        String resourcePath = "databricks/" + table;
        createTableFromResources(table, resourcePath, queryRunner);
    });
    return queryRunner;
}
Also used : DistributedQueryRunner(io.trino.testing.DistributedQueryRunner) QueryRunner(io.trino.testing.QueryRunner)

Example 5 with QueryRunner

use of io.trino.testing.QueryRunner in project trino by trinodb.

the class BaseDeltaLakeMinioConnectorTest method createQueryRunner.

@Override
protected QueryRunner createQueryRunner() throws Exception {
    this.dockerizedMinioDataLake = closeAfterClass(createDockerizedMinioDataLakeForDeltaLake(bucketName, Optional.empty()));
    QueryRunner queryRunner = DeltaLakeQueryRunner.createS3DeltaLakeQueryRunner(DELTA_CATALOG, SCHEMA, ImmutableMap.<String, String>builder().put("delta.enable-non-concurrent-writes", "true").buildOrThrow(), dockerizedMinioDataLake.getMinioAddress(), dockerizedMinioDataLake.getTestingHadoop());
    queryRunner.execute("CREATE SCHEMA " + SCHEMA + " WITH (location = 's3://" + bucketName + "/" + SCHEMA + "')");
    TpchTable.getTables().forEach(table -> {
        String tableName = table.getTableName();
        dockerizedMinioDataLake.copyResources(resourcePath + tableName, SCHEMA + "/" + tableName);
        queryRunner.execute(format("CREATE TABLE %1$s.%2$s.%3$s (dummy int) WITH (location = 's3://%4$s/%2$s/%3$s')", DELTA_CATALOG, SCHEMA, tableName, bucketName));
    });
    return queryRunner;
}
Also used : QueryRunner(io.trino.testing.QueryRunner)

Aggregations

QueryRunner (io.trino.testing.QueryRunner)29 DistributedQueryRunner (io.trino.testing.DistributedQueryRunner)17 ImmutableMap (com.google.common.collect.ImmutableMap)13 Test (org.testng.annotations.Test)13 Session (io.trino.Session)12 ImmutableList (com.google.common.collect.ImmutableList)11 Optional (java.util.Optional)11 TestingSession.testSessionBuilder (io.trino.testing.TestingSession.testSessionBuilder)10 SchemaTableName (io.trino.spi.connector.SchemaTableName)9 AbstractTestQueryFramework (io.trino.testing.AbstractTestQueryFramework)9 List (java.util.List)9 Assertions.assertThatThrownBy (org.assertj.core.api.Assertions.assertThatThrownBy)8 ImmutableSet (com.google.common.collect.ImmutableSet)7 MockConnectorFactory (io.trino.connector.MockConnectorFactory)7 TpchPlugin (io.trino.plugin.tpch.TpchPlugin)7 MaterializedResult (io.trino.testing.MaterializedResult)7 String.format (java.lang.String.format)7 Map (java.util.Map)6 AfterClass (org.testng.annotations.AfterClass)6 Plugin (io.trino.spi.Plugin)5