Search in sources :

Example 1 with MoreFutures

use of io.airlift.concurrent.MoreFutures in project trino by trinodb.

the class TestIcebergSparkCompatibility method testTrinoSparkConcurrentInsert.

/**
 * @see TestIcebergInsert#testIcebergConcurrentInsert()
 */
@Test(groups = { ICEBERG, PROFILE_SPECIFIC_TESTS }, timeOut = 60_000)
public void testTrinoSparkConcurrentInsert() throws Exception {
    int insertsPerEngine = 7;
    String baseTableName = "trino_spark_insert_concurrent_" + randomTableSuffix();
    String trinoTableName = trinoTableName(baseTableName);
    String sparkTableName = sparkTableName(baseTableName);
    onTrino().executeQuery("CREATE TABLE " + trinoTableName + "(e varchar, a bigint)");
    ExecutorService executor = Executors.newFixedThreadPool(2);
    try {
        CyclicBarrier barrier = new CyclicBarrier(2);
        QueryExecutor onTrino = onTrino();
        QueryExecutor onSpark = onSpark();
        List<Row> allInserted = executor.invokeAll(Stream.of(Engine.TRINO, Engine.SPARK).map(engine -> (Callable<List<Row>>) () -> {
            List<Row> inserted = new ArrayList<>();
            for (int i = 0; i < insertsPerEngine; i++) {
                barrier.await(20, SECONDS);
                String engineName = engine.name().toLowerCase(ENGLISH);
                long value = i;
                switch(engine) {
                    case TRINO:
                        try {
                            onTrino.executeQuery(format("INSERT INTO %s VALUES ('%s', %d)", trinoTableName, engineName, value));
                        } catch (QueryExecutionException queryExecutionException) {
                            // next loop iteration
                            continue;
                        }
                        break;
                    case SPARK:
                        onSpark.executeQuery(format("INSERT INTO %s VALUES ('%s', %d)", sparkTableName, engineName, value));
                        break;
                    default:
                        throw new UnsupportedOperationException("Unexpected engine: " + engine);
                }
                inserted.add(row(engineName, value));
            }
            return inserted;
        }).collect(toImmutableList())).stream().map(MoreFutures::getDone).flatMap(List::stream).collect(toImmutableList());
        // At least one INSERT per round should succeed
        Assertions.assertThat(allInserted).hasSizeBetween(insertsPerEngine, insertsPerEngine * 2);
        // All Spark inserts should succeed (and not be obliterated)
        assertThat(onTrino().executeQuery("SELECT count(*) FROM " + trinoTableName + " WHERE e = 'spark'")).containsOnly(row(insertsPerEngine));
        assertThat(onTrino().executeQuery("SELECT * FROM " + trinoTableName)).containsOnly(allInserted);
        onTrino().executeQuery("DROP TABLE " + trinoTableName);
    } finally {
        executor.shutdownNow();
    }
}
Also used : QueryExecutionException(io.trino.tempto.query.QueryExecutionException) QueryExecutor(io.trino.tempto.query.QueryExecutor) ExecutorService(java.util.concurrent.ExecutorService) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) Arrays.asList(java.util.Arrays.asList) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) List(java.util.List) Row(io.trino.tempto.assertions.QueryAssert.Row) MoreFutures(io.airlift.concurrent.MoreFutures) CyclicBarrier(java.util.concurrent.CyclicBarrier) Test(org.testng.annotations.Test) ProductTest(io.trino.tempto.ProductTest)

Example 2 with MoreFutures

use of io.airlift.concurrent.MoreFutures in project trino by trinodb.

the class GlueHiveMetastore method updatePartitionStatisticsBatch.

private void updatePartitionStatisticsBatch(Table table, Map<String, Function<PartitionStatistics, PartitionStatistics>> updates) {
    ImmutableList.Builder<BatchUpdatePartitionRequestEntry> partitionUpdateRequests = ImmutableList.builder();
    ImmutableSet.Builder<GlueColumnStatisticsProvider.PartitionStatisticsUpdate> columnStatisticsUpdates = ImmutableSet.builder();
    Map<List<String>, String> partitionValuesToName = updates.keySet().stream().collect(toImmutableMap(HiveUtil::toPartitionValues, identity()));
    List<Partition> partitions = batchGetPartition(table, ImmutableList.copyOf(updates.keySet()));
    Map<Partition, Map<String, HiveColumnStatistics>> statisticsPerPartition = columnStatisticsProvider.getPartitionColumnStatistics(partitions);
    statisticsPerPartition.forEach((partition, columnStatistics) -> {
        Function<PartitionStatistics, PartitionStatistics> update = updates.get(partitionValuesToName.get(partition.getValues()));
        PartitionStatistics currentStatistics = new PartitionStatistics(getHiveBasicStatistics(partition.getParameters()), columnStatistics);
        PartitionStatistics updatedStatistics = update.apply(currentStatistics);
        Map<String, String> updatedStatisticsParameters = updateStatisticsParameters(partition.getParameters(), updatedStatistics.getBasicStatistics());
        partition = Partition.builder(partition).setParameters(updatedStatisticsParameters).build();
        Map<String, HiveColumnStatistics> updatedColumnStatistics = updatedStatistics.getColumnStatistics();
        PartitionInput partitionInput = GlueInputConverter.convertPartition(partition);
        partitionInput.setParameters(partition.getParameters());
        partitionUpdateRequests.add(new BatchUpdatePartitionRequestEntry().withPartitionValueList(partition.getValues()).withPartitionInput(partitionInput));
        columnStatisticsUpdates.add(new GlueColumnStatisticsProvider.PartitionStatisticsUpdate(partition, updatedColumnStatistics));
    });
    List<List<BatchUpdatePartitionRequestEntry>> partitionUpdateRequestsPartitioned = Lists.partition(partitionUpdateRequests.build(), BATCH_UPDATE_PARTITION_MAX_PAGE_SIZE);
    List<Future<BatchUpdatePartitionResult>> partitionUpdateRequestsFutures = new ArrayList<>();
    partitionUpdateRequestsPartitioned.forEach(partitionUpdateRequestsPartition -> {
        // Update basic statistics
        long startTimestamp = System.currentTimeMillis();
        partitionUpdateRequestsFutures.add(glueClient.batchUpdatePartitionAsync(new BatchUpdatePartitionRequest().withCatalogId(catalogId).withDatabaseName(table.getDatabaseName()).withTableName(table.getTableName()).withEntries(partitionUpdateRequestsPartition), new StatsRecordingAsyncHandler(stats.getBatchUpdatePartition(), startTimestamp)));
    });
    try {
        // Update column statistics
        columnStatisticsProvider.updatePartitionStatistics(columnStatisticsUpdates.build());
        // Don't block on the batch update call until the column statistics have finished updating
        partitionUpdateRequestsFutures.forEach(MoreFutures::getFutureValue);
    } catch (AmazonServiceException e) {
        throw new TrinoException(HIVE_METASTORE_ERROR, e);
    }
}
Also used : ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ImmutableList(com.google.common.collect.ImmutableList) ArrayList(java.util.ArrayList) HiveColumnStatistics(io.trino.plugin.hive.metastore.HiveColumnStatistics) PartitionInput(com.amazonaws.services.glue.model.PartitionInput) BatchUpdatePartitionRequestEntry(com.amazonaws.services.glue.model.BatchUpdatePartitionRequestEntry) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ArrayList(java.util.ArrayList) PartitionValueList(com.amazonaws.services.glue.model.PartitionValueList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) GlueInputConverter.convertPartition(io.trino.plugin.hive.metastore.glue.converter.GlueInputConverter.convertPartition) Partition(io.trino.plugin.hive.metastore.Partition) BatchUpdatePartitionRequest(com.amazonaws.services.glue.model.BatchUpdatePartitionRequest) PartitionStatistics(io.trino.plugin.hive.PartitionStatistics) AmazonServiceException(com.amazonaws.AmazonServiceException) Future(java.util.concurrent.Future) TrinoException(io.trino.spi.TrinoException) Map(java.util.Map) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) Collectors.toMap(java.util.stream.Collectors.toMap) ImmutableMap(com.google.common.collect.ImmutableMap) MoreFutures(io.airlift.concurrent.MoreFutures)

Example 3 with MoreFutures

use of io.airlift.concurrent.MoreFutures in project trino by trinodb.

the class TestIcebergInsert method testIcebergConcurrentInsert.

/**
 * @see TestIcebergCreateTable#testCreateTable() See TestIcebergCreateTable for a non-concurrent INSERT test coverage.
 * @see TestIcebergSparkCompatibility#testTrinoSparkConcurrentInsert()
 */
@Test(groups = { ICEBERG, STORAGE_FORMATS_DETAILED, HMS_ONLY }, timeOut = 60_000)
public void testIcebergConcurrentInsert() throws Exception {
    int threads = 3;
    int insertsPerThread = 7;
    String tableName = "iceberg.default.test_insert_concurrent_" + randomTableSuffix();
    onTrino().executeQuery("CREATE TABLE " + tableName + "(a bigint)");
    ExecutorService executor = Executors.newFixedThreadPool(threads);
    try {
        CyclicBarrier barrier = new CyclicBarrier(threads);
        QueryExecutor onTrino = onTrino();
        List<Long> allInserted = executor.invokeAll(IntStream.range(0, threads).mapToObj(thread -> (Callable<List<Long>>) () -> {
            List<Long> inserted = new ArrayList<>();
            for (int i = 0; i < insertsPerThread; i++) {
                barrier.await(20, SECONDS);
                long value = i + (long) insertsPerThread * thread;
                try {
                    onTrino.executeQuery("INSERT INTO " + tableName + " VALUES " + value);
                } catch (QueryExecutionException queryExecutionException) {
                    // failed to insert
                    continue;
                }
                inserted.add(value);
            }
            return inserted;
        }).collect(toImmutableList())).stream().map(MoreFutures::getDone).flatMap(List::stream).collect(toImmutableList());
        // At least one INSERT per round should succeed
        Assertions.assertThat(allInserted).hasSizeBetween(insertsPerThread, threads * insertsPerThread);
        assertThat(onTrino().executeQuery("SELECT * FROM " + tableName)).containsOnly(allInserted.stream().map(QueryAssert.Row::row).toArray(QueryAssert.Row[]::new));
        onTrino().executeQuery("DROP TABLE " + tableName);
    } finally {
        executor.shutdownNow();
    }
}
Also used : QueryExecutionException(io.trino.tempto.query.QueryExecutionException) QueryExecutor(io.trino.tempto.query.QueryExecutor) ExecutorService(java.util.concurrent.ExecutorService) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ArrayList(java.util.ArrayList) List(java.util.List) MoreFutures(io.airlift.concurrent.MoreFutures) CyclicBarrier(java.util.concurrent.CyclicBarrier) ProductTest(io.trino.tempto.ProductTest) Test(org.testng.annotations.Test)

Aggregations

ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)3 MoreFutures (io.airlift.concurrent.MoreFutures)3 ArrayList (java.util.ArrayList)3 List (java.util.List)3 ImmutableList (com.google.common.collect.ImmutableList)2 ProductTest (io.trino.tempto.ProductTest)2 QueryExecutionException (io.trino.tempto.query.QueryExecutionException)2 QueryExecutor (io.trino.tempto.query.QueryExecutor)2 CyclicBarrier (java.util.concurrent.CyclicBarrier)2 ExecutorService (java.util.concurrent.ExecutorService)2 Test (org.testng.annotations.Test)2 AmazonServiceException (com.amazonaws.AmazonServiceException)1 BatchUpdatePartitionRequest (com.amazonaws.services.glue.model.BatchUpdatePartitionRequest)1 BatchUpdatePartitionRequestEntry (com.amazonaws.services.glue.model.BatchUpdatePartitionRequestEntry)1 PartitionInput (com.amazonaws.services.glue.model.PartitionInput)1 PartitionValueList (com.amazonaws.services.glue.model.PartitionValueList)1 ImmutableMap (com.google.common.collect.ImmutableMap)1 ImmutableMap.toImmutableMap (com.google.common.collect.ImmutableMap.toImmutableMap)1 ImmutableSet (com.google.common.collect.ImmutableSet)1 ImmutableSet.toImmutableSet (com.google.common.collect.ImmutableSet.toImmutableSet)1