use of io.trino.tempto.assertions.QueryAssert.Row in project trino by trinodb.
the class TestIcebergSparkCompatibility method testMigratedDataWithAlteredSchema.
@Test(groups = { ICEBERG, PROFILE_SPECIFIC_TESTS }, dataProvider = "storageFormats")
public void testMigratedDataWithAlteredSchema(StorageFormat storageFormat) {
String baseTableName = "test_migrated_data_with_altered_schema_" + randomTableSuffix();
String defaultCatalogTableName = sparkDefaultCatalogTableName(baseTableName);
String sparkTableDefinition = "" + "CREATE TABLE %s (\n" + " doc_id STRING\n" + ", nested_struct STRUCT<id:INT, name:STRING, address:STRUCT<a:INT, b:STRING>>)\n" + " USING %s";
onSpark().executeQuery(format(sparkTableDefinition, defaultCatalogTableName, storageFormat.name()));
String insert = "" + "INSERT INTO TABLE %s SELECT" + " 'Doc213'" + ", named_struct('id', 1, 'name', 'P. Sherman', 'address', named_struct('a', 42, 'b', 'Wallaby Way'))";
onSpark().executeQuery(format(insert, defaultCatalogTableName));
onSpark().executeQuery(format("CALL system.migrate('%s')", defaultCatalogTableName));
String sparkTableName = sparkTableName(baseTableName);
onSpark().executeQuery("ALTER TABLE " + sparkTableName + " RENAME COLUMN nested_struct TO nested_struct_moved");
String select = "SELECT" + " nested_struct_moved.name" + ", nested_struct_moved.address.a" + ", nested_struct_moved.address.b" + " FROM ";
Row row = row("P. Sherman", 42, "Wallaby Way");
QueryResult sparkResult = onSpark().executeQuery(select + sparkTableName);
assertThat(sparkResult).containsOnly(ImmutableList.of(row));
String trinoTableName = trinoTableName(baseTableName);
assertThat(onTrino().executeQuery(select + trinoTableName)).containsOnly(ImmutableList.of(row));
// After removing the name mapping, columns from migrated files should be null since they are missing the Iceberg Field IDs
onSpark().executeQuery(format("ALTER TABLE %s UNSET TBLPROPERTIES ('schema.name-mapping.default')", sparkTableName));
assertThat(onTrino().executeQuery(select + trinoTableName)).containsOnly(row(null, null, null));
}
use of io.trino.tempto.assertions.QueryAssert.Row in project trino by trinodb.
the class TestIcebergSparkCompatibility method testTrinoWritingDataWithObjectStorageLocationProvider.
@Test(groups = { ICEBERG, PROFILE_SPECIFIC_TESTS }, dataProvider = "storageFormatsWithSpecVersion")
public void testTrinoWritingDataWithObjectStorageLocationProvider(StorageFormat storageFormat, int specVersion) {
String baseTableName = "test_object_storage_location_provider_" + storageFormat;
String sparkTableName = sparkTableName(baseTableName);
String trinoTableName = trinoTableName(baseTableName);
String dataPath = "hdfs://hadoop-master:9000/user/hive/warehouse/test_object_storage_location_provider/obj-data";
onSpark().executeQuery(format("CREATE TABLE %s (_string STRING, _bigint BIGINT) USING ICEBERG TBLPROPERTIES (" + "'write.object-storage.enabled'=true," + "'write.object-storage.path'='%s'," + "'write.format.default' = '%s'," + "'format-version' = %s)", sparkTableName, dataPath, storageFormat, specVersion));
onTrino().executeQuery(format("INSERT INTO %s VALUES ('a_string', 1000000000000000)", trinoTableName));
Row result = row("a_string", 1000000000000000L);
assertThat(onSpark().executeQuery(format("SELECT _string, _bigint FROM %s", sparkTableName))).containsOnly(result);
assertThat(onTrino().executeQuery(format("SELECT _string, _bigint FROM %s", trinoTableName))).containsOnly(result);
QueryResult queryResult = onTrino().executeQuery(format("SELECT file_path FROM %s", trinoTableName("\"" + baseTableName + "$files\"")));
assertThat(queryResult).hasRowsCount(1).hasColumnsCount(1);
assertTrue(((String) queryResult.row(0).get(0)).contains(dataPath));
// TODO: support path override in Iceberg table creation: https://github.com/trinodb/trino/issues/8861
assertQueryFailure(() -> onTrino().executeQuery("DROP TABLE " + trinoTableName)).hasMessageContaining("contains Iceberg path override properties and cannot be dropped from Trino");
onSpark().executeQuery("DROP TABLE " + sparkTableName);
}
use of io.trino.tempto.assertions.QueryAssert.Row in project trino by trinodb.
the class TestIcebergSparkCompatibility method testSparkReadsTrinoPartitionedTable.
@Test(groups = { ICEBERG, PROFILE_SPECIFIC_TESTS }, dataProvider = "storageFormats")
public void testSparkReadsTrinoPartitionedTable(StorageFormat storageFormat) {
String baseTableName = "test_spark_reads_trino_partitioned_table_" + storageFormat;
String trinoTableName = trinoTableName(baseTableName);
String sparkTableName = sparkTableName(baseTableName);
onTrino().executeQuery("DROP TABLE IF EXISTS " + trinoTableName);
onTrino().executeQuery(format("CREATE TABLE %s (_string VARCHAR, _varbinary VARBINARY, _bigint BIGINT) WITH (partitioning = ARRAY['_string', '_varbinary'], format = '%s')", trinoTableName, storageFormat));
onTrino().executeQuery(format("INSERT INTO %s VALUES ('a', X'0ff102f0feff', 1001), ('b', X'0ff102f0fefe', 1002), ('c', X'0ff102fdfeff', 1003)", trinoTableName));
Row row1 = row("b", new byte[] { 15, -15, 2, -16, -2, -2 }, 1002);
String selectByString = "SELECT * FROM %s WHERE _string = 'b'";
assertThat(onTrino().executeQuery(format(selectByString, trinoTableName))).containsOnly(row1);
assertThat(onSpark().executeQuery(format(selectByString, sparkTableName))).containsOnly(row1);
Row row2 = row("a", new byte[] { 15, -15, 2, -16, -2, -1 }, 1001);
String selectByVarbinary = "SELECT * FROM %s WHERE _varbinary = X'0ff102f0feff'";
assertThat(onTrino().executeQuery(format(selectByVarbinary, trinoTableName))).containsOnly(row2);
// for now this fails on spark see https://github.com/apache/iceberg/issues/2934
assertQueryFailure(() -> onSpark().executeQuery(format(selectByVarbinary, sparkTableName))).hasMessageContaining("Cannot convert bytes to SQL literal: java.nio.HeapByteBuffer[pos=0 lim=6 cap=6]");
onTrino().executeQuery("DROP TABLE " + trinoTableName);
}
use of io.trino.tempto.assertions.QueryAssert.Row in project trino by trinodb.
the class TestIcebergSparkCompatibility method testTrinoReadingNestedSparkData.
@Test(groups = { ICEBERG, PROFILE_SPECIFIC_TESTS }, dataProvider = "storageFormatsWithSpecVersion")
public void testTrinoReadingNestedSparkData(StorageFormat storageFormat, int specVersion) {
String baseTableName = "test_trino_reading_nested_spark_data_" + storageFormat;
String trinoTableName = trinoTableName(baseTableName);
String sparkTableName = sparkTableName(baseTableName);
onSpark().executeQuery(format("CREATE TABLE %s (\n" + " doc_id STRING\n" + ", nested_map MAP<STRING, ARRAY<STRUCT<sname: STRING, snumber: INT>>>\n" + ", nested_array ARRAY<MAP<STRING, ARRAY<STRUCT<mname: STRING, mnumber: INT>>>>\n" + ", nested_struct STRUCT<name:STRING, complicated: ARRAY<MAP<STRING, ARRAY<STRUCT<mname: STRING, mnumber: INT>>>>>)\n" + " USING ICEBERG TBLPROPERTIES ('write.format.default'='%s', 'format-version' = %s)", sparkTableName, storageFormat, specVersion));
onSpark().executeQuery(format("INSERT INTO TABLE %s SELECT" + " 'Doc213'" + ", map('s1', array(named_struct('sname', 'ASName1', 'snumber', 201), named_struct('sname', 'ASName2', 'snumber', 202)))" + ", array(map('m1', array(named_struct('mname', 'MAS1Name1', 'mnumber', 301), named_struct('mname', 'MAS1Name2', 'mnumber', 302)))" + " ,map('m2', array(named_struct('mname', 'MAS2Name1', 'mnumber', 401), named_struct('mname', 'MAS2Name2', 'mnumber', 402))))" + ", named_struct('name', 'S1'," + " 'complicated', array(map('m1', array(named_struct('mname', 'SAMA1Name1', 'mnumber', 301), named_struct('mname', 'SAMA1Name2', 'mnumber', 302)))" + " ,map('m2', array(named_struct('mname', 'SAMA2Name1', 'mnumber', 401), named_struct('mname', 'SAMA2Name2', 'mnumber', 402)))))", sparkTableName));
Row row = row("Doc213", "ASName2", 201, "MAS2Name1", 302, "SAMA1Name1", 402);
assertThat(onSpark().executeQuery("SELECT" + " doc_id" + ", nested_map['s1'][1].sname" + ", nested_map['s1'][0].snumber" + ", nested_array[1]['m2'][0].mname" + ", nested_array[0]['m1'][1].mnumber" + ", nested_struct.complicated[0]['m1'][0].mname" + ", nested_struct.complicated[1]['m2'][1].mnumber" + " FROM " + sparkTableName)).containsOnly(row);
assertThat(onTrino().executeQuery("SELECT" + " doc_id" + ", nested_map['s1'][2].sname" + ", nested_map['s1'][1].snumber" + ", nested_array[2]['m2'][1].mname" + ", nested_array[1]['m1'][2].mnumber" + ", nested_struct.complicated[1]['m1'][1].mname" + ", nested_struct.complicated[2]['m2'][2].mnumber" + " FROM " + trinoTableName)).containsOnly(row);
onSpark().executeQuery("DROP TABLE " + sparkTableName);
}
use of io.trino.tempto.assertions.QueryAssert.Row in project trino by trinodb.
the class TestIcebergSparkCompatibility method testSparkReadingTrinoData.
@Test(groups = { ICEBERG, PROFILE_SPECIFIC_TESTS }, dataProvider = "testSparkReadingTrinoDataDataProvider")
public void testSparkReadingTrinoData(StorageFormat storageFormat, CreateMode createMode) {
String baseTableName = "test_spark_reading_primitive_types_" + storageFormat + "_" + createMode;
String trinoTableName = trinoTableName(baseTableName);
String sparkTableName = sparkTableName(baseTableName);
String namedValues = "SELECT " + " VARCHAR 'a_string' _string " + ", 1000000000000000 _bigint " + ", 1000000000 _integer " + ", REAL '10000000.123' _real " + ", DOUBLE '100000000000.123' _double " + ", DECIMAL '123456.78' _short_decimal " + ", DECIMAL '1234567890123456789.0123456789012345678' _long_decimal " + ", true _boolean " + // ", TIMESTAMP '2020-06-28 14:16:00.456' _timestamp " +
", TIMESTAMP '2021-08-03 08:32:21.123456 Europe/Warsaw' _timestamptz " + ", DATE '1950-06-28' _date " + ", X'000102f0feff' _binary " + // ", TIME '01:23:45.123456' _time " +
"";
switch(createMode) {
case CREATE_TABLE_AND_INSERT:
onTrino().executeQuery(format("CREATE TABLE %s (" + " _string VARCHAR" + ", _bigint BIGINT" + ", _integer INTEGER" + ", _real REAL" + ", _double DOUBLE" + ", _short_decimal decimal(8,2)" + ", _long_decimal decimal(38,19)" + ", _boolean BOOLEAN" + // ", _timestamp TIMESTAMP" -- per https://iceberg.apache.org/spark-writes/ Iceberg's timestamp is currently not supported with Spark
", _timestamptz timestamp(6) with time zone" + ", _date DATE" + ", _binary VARBINARY" + // ", _time time(6)" + -- per https://iceberg.apache.org/spark-writes/ Iceberg's time is currently not supported with Spark
") WITH (format = '%s')", trinoTableName, storageFormat));
onTrino().executeQuery(format("INSERT INTO %s %s", trinoTableName, namedValues));
break;
case CREATE_TABLE_AS_SELECT:
onTrino().executeQuery(format("CREATE TABLE %s AS %s", trinoTableName, namedValues));
break;
case CREATE_TABLE_WITH_NO_DATA_AND_INSERT:
onTrino().executeQuery(format("CREATE TABLE %s AS %s WITH NO DATA", trinoTableName, namedValues));
onTrino().executeQuery(format("INSERT INTO %s %s", trinoTableName, namedValues));
break;
default:
throw new UnsupportedOperationException("Unsupported create mode: " + createMode);
}
Row row = row("a_string", 1000000000000000L, 1000000000, 10000000.123F, 100000000000.123, new BigDecimal("123456.78"), new BigDecimal("1234567890123456789.0123456789012345678"), true, // Iceberg's timestamptz stores point in time, without zone
"2021-08-03 06:32:21.123456 UTC", "1950-06-28", new byte[] { 00, 01, 02, -16, -2, -1 });
assertThat(onTrino().executeQuery("SELECT " + " _string" + ", _bigint" + ", _integer" + ", _real" + ", _double" + ", _short_decimal" + ", _long_decimal" + ", _boolean" + // _timestamp OR CAST(_timestamp AS varchar)
", CAST(_timestamptz AS varchar)" + ", CAST(_date AS varchar)" + ", _binary" + // ", CAST(_time AS varchar)" +
" FROM " + trinoTableName)).containsOnly(row);
assertThat(onSpark().executeQuery("SELECT " + " _string" + ", _bigint" + ", _integer" + ", _real" + ", _double" + ", _short_decimal" + ", _long_decimal" + ", _boolean" + // _timestamp OR CAST(_timestamp AS string)
", CAST(_timestamptz AS string) || ' UTC'" + // Iceberg timestamptz is mapped to Spark timestamp and gets represented without time zone
", CAST(_date AS string)" + ", _binary" + // ", CAST(_time AS string)" +
" FROM " + sparkTableName)).containsOnly(row);
onTrino().executeQuery("DROP TABLE " + trinoTableName);
}
Aggregations