Search in sources :

Example 11 with Row

use of io.trino.tempto.assertions.QueryAssert.Row in project trino by trinodb.

the class TestIcebergSparkCompatibility method testMigratedDataWithAlteredSchema.

@Test(groups = { ICEBERG, PROFILE_SPECIFIC_TESTS }, dataProvider = "storageFormats")
public void testMigratedDataWithAlteredSchema(StorageFormat storageFormat) {
    String baseTableName = "test_migrated_data_with_altered_schema_" + randomTableSuffix();
    String defaultCatalogTableName = sparkDefaultCatalogTableName(baseTableName);
    String sparkTableDefinition = "" + "CREATE TABLE %s (\n" + "  doc_id STRING\n" + ", nested_struct STRUCT<id:INT, name:STRING, address:STRUCT<a:INT, b:STRING>>)\n" + " USING %s";
    onSpark().executeQuery(format(sparkTableDefinition, defaultCatalogTableName, storageFormat.name()));
    String insert = "" + "INSERT INTO TABLE %s SELECT" + "  'Doc213'" + ", named_struct('id', 1, 'name', 'P. Sherman', 'address', named_struct('a', 42, 'b', 'Wallaby Way'))";
    onSpark().executeQuery(format(insert, defaultCatalogTableName));
    onSpark().executeQuery(format("CALL system.migrate('%s')", defaultCatalogTableName));
    String sparkTableName = sparkTableName(baseTableName);
    onSpark().executeQuery("ALTER TABLE " + sparkTableName + " RENAME COLUMN nested_struct TO nested_struct_moved");
    String select = "SELECT" + " nested_struct_moved.name" + ", nested_struct_moved.address.a" + ", nested_struct_moved.address.b" + "  FROM ";
    Row row = row("P. Sherman", 42, "Wallaby Way");
    QueryResult sparkResult = onSpark().executeQuery(select + sparkTableName);
    assertThat(sparkResult).containsOnly(ImmutableList.of(row));
    String trinoTableName = trinoTableName(baseTableName);
    assertThat(onTrino().executeQuery(select + trinoTableName)).containsOnly(ImmutableList.of(row));
    // After removing the name mapping, columns from migrated files should be null since they are missing the Iceberg Field IDs
    onSpark().executeQuery(format("ALTER TABLE %s UNSET TBLPROPERTIES ('schema.name-mapping.default')", sparkTableName));
    assertThat(onTrino().executeQuery(select + trinoTableName)).containsOnly(row(null, null, null));
}
Also used : QueryResult(io.trino.tempto.query.QueryResult) Row(io.trino.tempto.assertions.QueryAssert.Row) Test(org.testng.annotations.Test) ProductTest(io.trino.tempto.ProductTest)

Example 12 with Row

use of io.trino.tempto.assertions.QueryAssert.Row in project trino by trinodb.

the class TestIcebergSparkCompatibility method testTrinoWritingDataWithObjectStorageLocationProvider.

@Test(groups = { ICEBERG, PROFILE_SPECIFIC_TESTS }, dataProvider = "storageFormatsWithSpecVersion")
public void testTrinoWritingDataWithObjectStorageLocationProvider(StorageFormat storageFormat, int specVersion) {
    String baseTableName = "test_object_storage_location_provider_" + storageFormat;
    String sparkTableName = sparkTableName(baseTableName);
    String trinoTableName = trinoTableName(baseTableName);
    String dataPath = "hdfs://hadoop-master:9000/user/hive/warehouse/test_object_storage_location_provider/obj-data";
    onSpark().executeQuery(format("CREATE TABLE %s (_string STRING, _bigint BIGINT) USING ICEBERG TBLPROPERTIES (" + "'write.object-storage.enabled'=true," + "'write.object-storage.path'='%s'," + "'write.format.default' = '%s'," + "'format-version' = %s)", sparkTableName, dataPath, storageFormat, specVersion));
    onTrino().executeQuery(format("INSERT INTO %s VALUES ('a_string', 1000000000000000)", trinoTableName));
    Row result = row("a_string", 1000000000000000L);
    assertThat(onSpark().executeQuery(format("SELECT _string, _bigint FROM %s", sparkTableName))).containsOnly(result);
    assertThat(onTrino().executeQuery(format("SELECT _string, _bigint FROM %s", trinoTableName))).containsOnly(result);
    QueryResult queryResult = onTrino().executeQuery(format("SELECT file_path FROM %s", trinoTableName("\"" + baseTableName + "$files\"")));
    assertThat(queryResult).hasRowsCount(1).hasColumnsCount(1);
    assertTrue(((String) queryResult.row(0).get(0)).contains(dataPath));
    // TODO: support path override in Iceberg table creation: https://github.com/trinodb/trino/issues/8861
    assertQueryFailure(() -> onTrino().executeQuery("DROP TABLE " + trinoTableName)).hasMessageContaining("contains Iceberg path override properties and cannot be dropped from Trino");
    onSpark().executeQuery("DROP TABLE " + sparkTableName);
}
Also used : QueryResult(io.trino.tempto.query.QueryResult) Row(io.trino.tempto.assertions.QueryAssert.Row) Test(org.testng.annotations.Test) ProductTest(io.trino.tempto.ProductTest)

Example 13 with Row

use of io.trino.tempto.assertions.QueryAssert.Row in project trino by trinodb.

the class TestIcebergSparkCompatibility method testSparkReadsTrinoPartitionedTable.

@Test(groups = { ICEBERG, PROFILE_SPECIFIC_TESTS }, dataProvider = "storageFormats")
public void testSparkReadsTrinoPartitionedTable(StorageFormat storageFormat) {
    String baseTableName = "test_spark_reads_trino_partitioned_table_" + storageFormat;
    String trinoTableName = trinoTableName(baseTableName);
    String sparkTableName = sparkTableName(baseTableName);
    onTrino().executeQuery("DROP TABLE IF EXISTS " + trinoTableName);
    onTrino().executeQuery(format("CREATE TABLE %s (_string VARCHAR, _varbinary VARBINARY, _bigint BIGINT) WITH (partitioning = ARRAY['_string', '_varbinary'], format = '%s')", trinoTableName, storageFormat));
    onTrino().executeQuery(format("INSERT INTO %s VALUES ('a', X'0ff102f0feff', 1001), ('b', X'0ff102f0fefe', 1002), ('c', X'0ff102fdfeff', 1003)", trinoTableName));
    Row row1 = row("b", new byte[] { 15, -15, 2, -16, -2, -2 }, 1002);
    String selectByString = "SELECT * FROM %s WHERE _string = 'b'";
    assertThat(onTrino().executeQuery(format(selectByString, trinoTableName))).containsOnly(row1);
    assertThat(onSpark().executeQuery(format(selectByString, sparkTableName))).containsOnly(row1);
    Row row2 = row("a", new byte[] { 15, -15, 2, -16, -2, -1 }, 1001);
    String selectByVarbinary = "SELECT * FROM %s WHERE _varbinary = X'0ff102f0feff'";
    assertThat(onTrino().executeQuery(format(selectByVarbinary, trinoTableName))).containsOnly(row2);
    // for now this fails on spark see https://github.com/apache/iceberg/issues/2934
    assertQueryFailure(() -> onSpark().executeQuery(format(selectByVarbinary, sparkTableName))).hasMessageContaining("Cannot convert bytes to SQL literal: java.nio.HeapByteBuffer[pos=0 lim=6 cap=6]");
    onTrino().executeQuery("DROP TABLE " + trinoTableName);
}
Also used : Row(io.trino.tempto.assertions.QueryAssert.Row) Test(org.testng.annotations.Test) ProductTest(io.trino.tempto.ProductTest)

Example 14 with Row

use of io.trino.tempto.assertions.QueryAssert.Row in project trino by trinodb.

the class TestIcebergSparkCompatibility method testTrinoReadingNestedSparkData.

@Test(groups = { ICEBERG, PROFILE_SPECIFIC_TESTS }, dataProvider = "storageFormatsWithSpecVersion")
public void testTrinoReadingNestedSparkData(StorageFormat storageFormat, int specVersion) {
    String baseTableName = "test_trino_reading_nested_spark_data_" + storageFormat;
    String trinoTableName = trinoTableName(baseTableName);
    String sparkTableName = sparkTableName(baseTableName);
    onSpark().executeQuery(format("CREATE TABLE %s (\n" + "  doc_id STRING\n" + ", nested_map MAP<STRING, ARRAY<STRUCT<sname: STRING, snumber: INT>>>\n" + ", nested_array ARRAY<MAP<STRING, ARRAY<STRUCT<mname: STRING, mnumber: INT>>>>\n" + ", nested_struct STRUCT<name:STRING, complicated: ARRAY<MAP<STRING, ARRAY<STRUCT<mname: STRING, mnumber: INT>>>>>)\n" + " USING ICEBERG TBLPROPERTIES ('write.format.default'='%s', 'format-version' = %s)", sparkTableName, storageFormat, specVersion));
    onSpark().executeQuery(format("INSERT INTO TABLE %s SELECT" + "  'Doc213'" + ", map('s1', array(named_struct('sname', 'ASName1', 'snumber', 201), named_struct('sname', 'ASName2', 'snumber', 202)))" + ", array(map('m1', array(named_struct('mname', 'MAS1Name1', 'mnumber', 301), named_struct('mname', 'MAS1Name2', 'mnumber', 302)))" + "       ,map('m2', array(named_struct('mname', 'MAS2Name1', 'mnumber', 401), named_struct('mname', 'MAS2Name2', 'mnumber', 402))))" + ", named_struct('name', 'S1'," + "               'complicated', array(map('m1', array(named_struct('mname', 'SAMA1Name1', 'mnumber', 301), named_struct('mname', 'SAMA1Name2', 'mnumber', 302)))" + "                                   ,map('m2', array(named_struct('mname', 'SAMA2Name1', 'mnumber', 401), named_struct('mname', 'SAMA2Name2', 'mnumber', 402)))))", sparkTableName));
    Row row = row("Doc213", "ASName2", 201, "MAS2Name1", 302, "SAMA1Name1", 402);
    assertThat(onSpark().executeQuery("SELECT" + "  doc_id" + ", nested_map['s1'][1].sname" + ", nested_map['s1'][0].snumber" + ", nested_array[1]['m2'][0].mname" + ", nested_array[0]['m1'][1].mnumber" + ", nested_struct.complicated[0]['m1'][0].mname" + ", nested_struct.complicated[1]['m2'][1].mnumber" + "  FROM " + sparkTableName)).containsOnly(row);
    assertThat(onTrino().executeQuery("SELECT" + "  doc_id" + ", nested_map['s1'][2].sname" + ", nested_map['s1'][1].snumber" + ", nested_array[2]['m2'][1].mname" + ", nested_array[1]['m1'][2].mnumber" + ", nested_struct.complicated[1]['m1'][1].mname" + ", nested_struct.complicated[2]['m2'][2].mnumber" + "  FROM " + trinoTableName)).containsOnly(row);
    onSpark().executeQuery("DROP TABLE " + sparkTableName);
}
Also used : Row(io.trino.tempto.assertions.QueryAssert.Row) Test(org.testng.annotations.Test) ProductTest(io.trino.tempto.ProductTest)

Example 15 with Row

use of io.trino.tempto.assertions.QueryAssert.Row in project trino by trinodb.

the class TestIcebergSparkCompatibility method testSparkReadingTrinoData.

@Test(groups = { ICEBERG, PROFILE_SPECIFIC_TESTS }, dataProvider = "testSparkReadingTrinoDataDataProvider")
public void testSparkReadingTrinoData(StorageFormat storageFormat, CreateMode createMode) {
    String baseTableName = "test_spark_reading_primitive_types_" + storageFormat + "_" + createMode;
    String trinoTableName = trinoTableName(baseTableName);
    String sparkTableName = sparkTableName(baseTableName);
    String namedValues = "SELECT " + "  VARCHAR 'a_string' _string " + ", 1000000000000000 _bigint " + ", 1000000000 _integer " + ", REAL '10000000.123' _real " + ", DOUBLE '100000000000.123' _double " + ", DECIMAL '123456.78' _short_decimal " + ", DECIMAL '1234567890123456789.0123456789012345678' _long_decimal " + ", true _boolean " + // ", TIMESTAMP '2020-06-28 14:16:00.456' _timestamp " +
    ", TIMESTAMP '2021-08-03 08:32:21.123456 Europe/Warsaw' _timestamptz " + ", DATE '1950-06-28' _date " + ", X'000102f0feff' _binary " + // ", TIME '01:23:45.123456' _time " +
    "";
    switch(createMode) {
        case CREATE_TABLE_AND_INSERT:
            onTrino().executeQuery(format("CREATE TABLE %s (" + "  _string VARCHAR" + ", _bigint BIGINT" + ", _integer INTEGER" + ", _real REAL" + ", _double DOUBLE" + ", _short_decimal decimal(8,2)" + ", _long_decimal decimal(38,19)" + ", _boolean BOOLEAN" + // ", _timestamp TIMESTAMP" -- per https://iceberg.apache.org/spark-writes/ Iceberg's timestamp is currently not supported with Spark
            ", _timestamptz timestamp(6) with time zone" + ", _date DATE" + ", _binary VARBINARY" + // ", _time time(6)" + -- per https://iceberg.apache.org/spark-writes/ Iceberg's time is currently not supported with Spark
            ") WITH (format = '%s')", trinoTableName, storageFormat));
            onTrino().executeQuery(format("INSERT INTO %s %s", trinoTableName, namedValues));
            break;
        case CREATE_TABLE_AS_SELECT:
            onTrino().executeQuery(format("CREATE TABLE %s AS %s", trinoTableName, namedValues));
            break;
        case CREATE_TABLE_WITH_NO_DATA_AND_INSERT:
            onTrino().executeQuery(format("CREATE TABLE %s AS %s WITH NO DATA", trinoTableName, namedValues));
            onTrino().executeQuery(format("INSERT INTO %s %s", trinoTableName, namedValues));
            break;
        default:
            throw new UnsupportedOperationException("Unsupported create mode: " + createMode);
    }
    Row row = row("a_string", 1000000000000000L, 1000000000, 10000000.123F, 100000000000.123, new BigDecimal("123456.78"), new BigDecimal("1234567890123456789.0123456789012345678"), true, // Iceberg's timestamptz stores point in time, without zone
    "2021-08-03 06:32:21.123456 UTC", "1950-06-28", new byte[] { 00, 01, 02, -16, -2, -1 });
    assertThat(onTrino().executeQuery("SELECT " + "  _string" + ", _bigint" + ", _integer" + ", _real" + ", _double" + ", _short_decimal" + ", _long_decimal" + ", _boolean" + // _timestamp OR CAST(_timestamp AS varchar)
    ", CAST(_timestamptz AS varchar)" + ", CAST(_date AS varchar)" + ", _binary" + // ", CAST(_time AS varchar)" +
    " FROM " + trinoTableName)).containsOnly(row);
    assertThat(onSpark().executeQuery("SELECT " + "  _string" + ", _bigint" + ", _integer" + ", _real" + ", _double" + ", _short_decimal" + ", _long_decimal" + ", _boolean" + // _timestamp OR CAST(_timestamp AS string)
    ", CAST(_timestamptz AS string) || ' UTC'" + // Iceberg timestamptz is mapped to Spark timestamp and gets represented without time zone
    ", CAST(_date AS string)" + ", _binary" + // ", CAST(_time AS string)" +
    " FROM " + sparkTableName)).containsOnly(row);
    onTrino().executeQuery("DROP TABLE " + trinoTableName);
}
Also used : Row(io.trino.tempto.assertions.QueryAssert.Row) BigDecimal(java.math.BigDecimal) Test(org.testng.annotations.Test) ProductTest(io.trino.tempto.ProductTest)

Aggregations

Row (io.trino.tempto.assertions.QueryAssert.Row)19 Test (org.testng.annotations.Test)16 ProductTest (io.trino.tempto.ProductTest)14 QueryResult (io.trino.tempto.query.QueryResult)9 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)5 Row.row (io.trino.tempto.assertions.QueryAssert.Row.row)3 QueryAssert.assertThat (io.trino.tempto.assertions.QueryAssert.assertThat)3 QueryExecutionException (io.trino.tempto.query.QueryExecutionException)3 Flaky (io.trino.testng.services.Flaky)3 HMS_ONLY (io.trino.tests.product.TestGroups.HMS_ONLY)3 STORAGE_FORMATS (io.trino.tests.product.TestGroups.STORAGE_FORMATS)3 ERROR_COMMITTING_WRITE_TO_HIVE_ISSUE (io.trino.tests.product.hive.HiveProductTest.ERROR_COMMITTING_WRITE_TO_HIVE_ISSUE)3 ERROR_COMMITTING_WRITE_TO_HIVE_MATCH (io.trino.tests.product.hive.HiveProductTest.ERROR_COMMITTING_WRITE_TO_HIVE_MATCH)3 QueryExecutors.onHive (io.trino.tests.product.utils.QueryExecutors.onHive)3 QueryExecutors.onTrino (io.trino.tests.product.utils.QueryExecutors.onTrino)3 String.format (java.lang.String.format)3 MoreObjects.toStringHelper (com.google.common.base.MoreObjects.toStringHelper)2 Splitter (com.google.common.base.Splitter)2 Verify.verify (com.google.common.base.Verify.verify)2 ImmutableMap (com.google.common.collect.ImmutableMap)2