use of io.trino.tempto.assertions.QueryAssert.Row in project trino by trinodb.
the class TestIcebergSparkCompatibility method testTrinoReadingMigratedNestedData.
@Test(groups = { ICEBERG, PROFILE_SPECIFIC_TESTS }, dataProvider = "storageFormats")
public void testTrinoReadingMigratedNestedData(StorageFormat storageFormat) {
String baseTableName = "test_trino_reading_migrated_nested_data_" + randomTableSuffix();
String defaultCatalogTableName = sparkDefaultCatalogTableName(baseTableName);
String sparkTableDefinition = "" + "CREATE TABLE %s (\n" + " doc_id STRING\n" + ", nested_map MAP<STRING, ARRAY<STRUCT<sName: STRING, sNumber: INT>>>\n" + ", nested_array ARRAY<MAP<STRING, ARRAY<STRUCT<mName: STRING, mNumber: INT>>>>\n" + ", nested_struct STRUCT<id:INT, name:STRING, address:STRUCT<street_number:INT, street_name:STRING>>)\n" + " USING %s";
onSpark().executeQuery(format(sparkTableDefinition, defaultCatalogTableName, storageFormat.name()));
String insert = "" + "INSERT INTO TABLE %s SELECT" + " 'Doc213'" + ", map('s1', array(named_struct('sName', 'ASName1', 'sNumber', 201), named_struct('sName', 'ASName2', 'sNumber', 202)))" + ", array(map('m1', array(named_struct('mName', 'MAS1Name1', 'mNumber', 301), named_struct('mName', 'MAS1Name2', 'mNumber', 302)))" + " ,map('m2', array(named_struct('mName', 'MAS2Name1', 'mNumber', 401), named_struct('mName', 'MAS2Name2', 'mNumber', 402))))" + ", named_struct('id', 1, 'name', 'P. Sherman', 'address', named_struct('street_number', 42, 'street_name', 'Wallaby Way'))";
onSpark().executeQuery(format(insert, defaultCatalogTableName));
onSpark().executeQuery(format("CALL system.migrate('%s')", defaultCatalogTableName));
String sparkTableName = sparkTableName(baseTableName);
Row row = row("Doc213", "ASName2", 201, "MAS2Name1", 302, "P. Sherman", 42, "Wallaby Way");
String sparkSelect = "SELECT" + " doc_id" + ", nested_map['s1'][1].sName" + ", nested_map['s1'][0].sNumber" + ", nested_array[1]['m2'][0].mName" + ", nested_array[0]['m1'][1].mNumber" + ", nested_struct.name" + ", nested_struct.address.street_number" + ", nested_struct.address.street_name" + " FROM ";
QueryResult sparkResult = onSpark().executeQuery(sparkSelect + sparkTableName);
// The Spark behavior when the default name mapping does not exist is not consistent
assertThat(sparkResult).containsOnly(row);
String trinoSelect = "SELECT" + " doc_id" + ", nested_map['s1'][2].sName" + ", nested_map['s1'][1].sNumber" + ", nested_array[2]['m2'][1].mName" + ", nested_array[1]['m1'][2].mNumber" + ", nested_struct.name" + ", nested_struct.address.street_number" + ", nested_struct.address.street_name" + " FROM ";
String trinoTableName = trinoTableName(baseTableName);
QueryResult trinoResult = onTrino().executeQuery(trinoSelect + trinoTableName);
assertThat(trinoResult).containsOnly(row);
// After removing the name mapping, columns from migrated files should be null since they are missing the Iceberg Field IDs
onSpark().executeQuery(format("ALTER TABLE %s UNSET TBLPROPERTIES ('schema.name-mapping.default')", sparkTableName));
assertThat(onTrino().executeQuery(trinoSelect + trinoTableName)).containsOnly(row(null, null, null, null, null, null, null, null));
assertThat(onTrino().executeQuery("SELECT * FROM " + trinoTableName)).containsOnly(row(null, null, null, null));
assertThat(onTrino().executeQuery("SELECT nested_struct.address.street_number, nested_struct.address.street_name FROM " + trinoTableName)).containsOnly(row(null, null));
}
use of io.trino.tempto.assertions.QueryAssert.Row in project trino by trinodb.
the class TestIcebergSparkCompatibility method testTrinoSparkConcurrentInsert.
/**
* @see TestIcebergInsert#testIcebergConcurrentInsert()
*/
@Test(groups = { ICEBERG, PROFILE_SPECIFIC_TESTS }, timeOut = 60_000)
public void testTrinoSparkConcurrentInsert() throws Exception {
int insertsPerEngine = 7;
String baseTableName = "trino_spark_insert_concurrent_" + randomTableSuffix();
String trinoTableName = trinoTableName(baseTableName);
String sparkTableName = sparkTableName(baseTableName);
onTrino().executeQuery("CREATE TABLE " + trinoTableName + "(e varchar, a bigint)");
ExecutorService executor = Executors.newFixedThreadPool(2);
try {
CyclicBarrier barrier = new CyclicBarrier(2);
QueryExecutor onTrino = onTrino();
QueryExecutor onSpark = onSpark();
List<Row> allInserted = executor.invokeAll(Stream.of(Engine.TRINO, Engine.SPARK).map(engine -> (Callable<List<Row>>) () -> {
List<Row> inserted = new ArrayList<>();
for (int i = 0; i < insertsPerEngine; i++) {
barrier.await(20, SECONDS);
String engineName = engine.name().toLowerCase(ENGLISH);
long value = i;
switch(engine) {
case TRINO:
try {
onTrino.executeQuery(format("INSERT INTO %s VALUES ('%s', %d)", trinoTableName, engineName, value));
} catch (QueryExecutionException queryExecutionException) {
// next loop iteration
continue;
}
break;
case SPARK:
onSpark.executeQuery(format("INSERT INTO %s VALUES ('%s', %d)", sparkTableName, engineName, value));
break;
default:
throw new UnsupportedOperationException("Unexpected engine: " + engine);
}
inserted.add(row(engineName, value));
}
return inserted;
}).collect(toImmutableList())).stream().map(MoreFutures::getDone).flatMap(List::stream).collect(toImmutableList());
// At least one INSERT per round should succeed
Assertions.assertThat(allInserted).hasSizeBetween(insertsPerEngine, insertsPerEngine * 2);
// All Spark inserts should succeed (and not be obliterated)
assertThat(onTrino().executeQuery("SELECT count(*) FROM " + trinoTableName + " WHERE e = 'spark'")).containsOnly(row(insertsPerEngine));
assertThat(onTrino().executeQuery("SELECT * FROM " + trinoTableName)).containsOnly(allInserted);
onTrino().executeQuery("DROP TABLE " + trinoTableName);
} finally {
executor.shutdownNow();
}
}
use of io.trino.tempto.assertions.QueryAssert.Row in project trino by trinodb.
the class TestIcebergSparkCompatibility method testTrinoReadingSparkData.
@Test(groups = { ICEBERG, PROFILE_SPECIFIC_TESTS }, dataProvider = "storageFormatsWithSpecVersion")
public void testTrinoReadingSparkData(StorageFormat storageFormat, int specVersion) {
String baseTableName = "test_trino_reading_primitive_types_" + storageFormat;
String trinoTableName = trinoTableName(baseTableName);
String sparkTableName = sparkTableName(baseTableName);
onSpark().executeQuery("DROP TABLE IF EXISTS " + sparkTableName);
onSpark().executeQuery(format("CREATE TABLE %s (" + " _string STRING" + ", _bigint BIGINT" + ", _integer INTEGER" + ", _real REAL" + ", _double DOUBLE" + ", _short_decimal decimal(8,2)" + ", _long_decimal decimal(38,19)" + ", _boolean BOOLEAN" + ", _timestamp TIMESTAMP" + ", _date DATE" + ", _binary BINARY" + ") USING ICEBERG " + "TBLPROPERTIES ('write.format.default'='%s', 'format-version' = %s)", sparkTableName, storageFormat, specVersion));
// Validate queries on an empty table created by Spark
assertThat(onTrino().executeQuery(format("SELECT * FROM %s", trinoTableName("\"" + baseTableName + "$snapshots\"")))).hasNoRows();
assertThat(onTrino().executeQuery(format("SELECT * FROM %s", trinoTableName))).hasNoRows();
onSpark().executeQuery(format("INSERT INTO %s VALUES (" + "'a_string'" + ", 1000000000000000" + ", 1000000000" + ", 10000000.123" + ", 100000000000.123" + ", CAST('123456.78' AS decimal(8,2))" + ", CAST('1234567890123456789.0123456789012345678' AS decimal(38,19))" + ", true" + ", TIMESTAMP '2020-06-28 14:16:00.456'" + ", DATE '1950-06-28'" + ", X'000102f0feff'" + ")", sparkTableName));
Row row = row("a_string", 1000000000000000L, 1000000000, 10000000.123F, 100000000000.123, new BigDecimal("123456.78"), new BigDecimal("1234567890123456789.0123456789012345678"), true, Timestamp.valueOf("2020-06-28 14:16:00.456"), Date.valueOf("1950-06-28"), new byte[] { 00, 01, 02, -16, -2, -1 });
assertThat(onSpark().executeQuery("SELECT " + " _string" + ", _bigint" + ", _integer" + ", _real" + ", _double" + ", _short_decimal" + ", _long_decimal" + ", _boolean" + ", _timestamp" + ", _date" + ", _binary" + " FROM " + sparkTableName)).containsOnly(row);
assertThat(onTrino().executeQuery("SELECT " + " _string" + ", _bigint" + ", _integer" + ", _real" + ", _double" + ", _short_decimal" + ", _long_decimal" + ", _boolean" + // TODO test the value without a CAST from timestamp with time zone to timestamp
", CAST(_timestamp AS TIMESTAMP)" + ", _date" + ", _binary" + " FROM " + trinoTableName)).containsOnly(row);
onSpark().executeQuery("DROP TABLE " + sparkTableName);
}
use of io.trino.tempto.assertions.QueryAssert.Row in project trino by trinodb.
the class TestHiveCoercion method assertQueryResults.
private void assertQueryResults(Engine engine, String query, Map<String, List<Object>> expected, List<String> columns, int rowCount, String tableName) {
QueryResult actual = execute(engine, query);
ImmutableList.Builder<Row> rowsBuilder = ImmutableList.builder();
for (int row = 0; row < rowCount; row++) {
// Avoid ImmutableList to allow nulls
List<Object> currentRow = new ArrayList<>();
for (int column = 0; column < columns.size(); column++) {
String columnName = columns.get(column);
checkArgument(expected.containsKey(columnName), "columnName should be present in expected results");
currentRow.add(expected.get(columnName).get(row));
}
rowsBuilder.add(new Row(currentRow));
}
List<Row> expectedRows = rowsBuilder.build();
assertColumnTypes(actual, tableName, engine, columns);
for (int sqlIndex = 1; sqlIndex <= columns.size(); sqlIndex++) {
String column = columns.get(sqlIndex - 1);
if (column.equals("row_to_row") || column.equals("map_to_map")) {
assertEqualsIgnoreOrder(actual.column(sqlIndex), column(expectedRows, sqlIndex), format("%s field is not equal", column));
continue;
}
if (column.equals("list_to_list")) {
assertEqualsIgnoreOrder(engine == Engine.TRINO ? extract(actual.column(sqlIndex)) : actual.column(sqlIndex), column(expectedRows, sqlIndex), "list_to_list field is not equal");
continue;
}
// test primitive values
assertThat(actual.project(sqlIndex)).containsOnly(project(expectedRows, sqlIndex));
}
}
use of io.trino.tempto.assertions.QueryAssert.Row in project trino by trinodb.
the class TestIcebergSparkCompatibility method testTrinoWritingDataWithWriterDataPathSet.
@Test(groups = { ICEBERG, PROFILE_SPECIFIC_TESTS }, dataProvider = "storageFormatsWithSpecVersion")
public void testTrinoWritingDataWithWriterDataPathSet(StorageFormat storageFormat, int specVersion) {
String baseTableName = "test_writer_data_path_" + storageFormat;
String sparkTableName = sparkTableName(baseTableName);
String trinoTableName = trinoTableName(baseTableName);
String dataPath = "hdfs://hadoop-master:9000/user/hive/warehouse/test_writer_data_path_/obj-data";
onSpark().executeQuery(format("CREATE TABLE %s (_string STRING, _bigint BIGINT) USING ICEBERG TBLPROPERTIES (" + "'write.data.path'='%s'," + "'write.format.default' = '%s'," + "'format-version' = %s)", sparkTableName, dataPath, storageFormat, specVersion));
onTrino().executeQuery(format("INSERT INTO %s VALUES ('a_string', 1000000000000000)", trinoTableName));
Row result = row("a_string", 1000000000000000L);
assertThat(onSpark().executeQuery(format("SELECT _string, _bigint FROM %s", sparkTableName))).containsOnly(result);
assertThat(onTrino().executeQuery(format("SELECT _string, _bigint FROM %s", trinoTableName))).containsOnly(result);
QueryResult queryResult = onTrino().executeQuery(format("SELECT file_path FROM %s", trinoTableName("\"" + baseTableName + "$files\"")));
assertThat(queryResult).hasRowsCount(1).hasColumnsCount(1);
assertTrue(((String) queryResult.row(0).get(0)).contains(dataPath));
assertQueryFailure(() -> onTrino().executeQuery("DROP TABLE " + trinoTableName)).hasMessageContaining("contains Iceberg path override properties and cannot be dropped from Trino");
onSpark().executeQuery("DROP TABLE " + sparkTableName);
}
Aggregations