use of io.airlift.concurrent.MoreFutures in project trino by trinodb.
the class TestIcebergSparkCompatibility method testTrinoSparkConcurrentInsert.
/**
* @see TestIcebergInsert#testIcebergConcurrentInsert()
*/
@Test(groups = { ICEBERG, PROFILE_SPECIFIC_TESTS }, timeOut = 60_000)
public void testTrinoSparkConcurrentInsert() throws Exception {
int insertsPerEngine = 7;
String baseTableName = "trino_spark_insert_concurrent_" + randomTableSuffix();
String trinoTableName = trinoTableName(baseTableName);
String sparkTableName = sparkTableName(baseTableName);
onTrino().executeQuery("CREATE TABLE " + trinoTableName + "(e varchar, a bigint)");
ExecutorService executor = Executors.newFixedThreadPool(2);
try {
CyclicBarrier barrier = new CyclicBarrier(2);
QueryExecutor onTrino = onTrino();
QueryExecutor onSpark = onSpark();
List<Row> allInserted = executor.invokeAll(Stream.of(Engine.TRINO, Engine.SPARK).map(engine -> (Callable<List<Row>>) () -> {
List<Row> inserted = new ArrayList<>();
for (int i = 0; i < insertsPerEngine; i++) {
barrier.await(20, SECONDS);
String engineName = engine.name().toLowerCase(ENGLISH);
long value = i;
switch(engine) {
case TRINO:
try {
onTrino.executeQuery(format("INSERT INTO %s VALUES ('%s', %d)", trinoTableName, engineName, value));
} catch (QueryExecutionException queryExecutionException) {
// next loop iteration
continue;
}
break;
case SPARK:
onSpark.executeQuery(format("INSERT INTO %s VALUES ('%s', %d)", sparkTableName, engineName, value));
break;
default:
throw new UnsupportedOperationException("Unexpected engine: " + engine);
}
inserted.add(row(engineName, value));
}
return inserted;
}).collect(toImmutableList())).stream().map(MoreFutures::getDone).flatMap(List::stream).collect(toImmutableList());
// At least one INSERT per round should succeed
Assertions.assertThat(allInserted).hasSizeBetween(insertsPerEngine, insertsPerEngine * 2);
// All Spark inserts should succeed (and not be obliterated)
assertThat(onTrino().executeQuery("SELECT count(*) FROM " + trinoTableName + " WHERE e = 'spark'")).containsOnly(row(insertsPerEngine));
assertThat(onTrino().executeQuery("SELECT * FROM " + trinoTableName)).containsOnly(allInserted);
onTrino().executeQuery("DROP TABLE " + trinoTableName);
} finally {
executor.shutdownNow();
}
}
use of io.airlift.concurrent.MoreFutures in project trino by trinodb.
the class GlueHiveMetastore method updatePartitionStatisticsBatch.
private void updatePartitionStatisticsBatch(Table table, Map<String, Function<PartitionStatistics, PartitionStatistics>> updates) {
ImmutableList.Builder<BatchUpdatePartitionRequestEntry> partitionUpdateRequests = ImmutableList.builder();
ImmutableSet.Builder<GlueColumnStatisticsProvider.PartitionStatisticsUpdate> columnStatisticsUpdates = ImmutableSet.builder();
Map<List<String>, String> partitionValuesToName = updates.keySet().stream().collect(toImmutableMap(HiveUtil::toPartitionValues, identity()));
List<Partition> partitions = batchGetPartition(table, ImmutableList.copyOf(updates.keySet()));
Map<Partition, Map<String, HiveColumnStatistics>> statisticsPerPartition = columnStatisticsProvider.getPartitionColumnStatistics(partitions);
statisticsPerPartition.forEach((partition, columnStatistics) -> {
Function<PartitionStatistics, PartitionStatistics> update = updates.get(partitionValuesToName.get(partition.getValues()));
PartitionStatistics currentStatistics = new PartitionStatistics(getHiveBasicStatistics(partition.getParameters()), columnStatistics);
PartitionStatistics updatedStatistics = update.apply(currentStatistics);
Map<String, String> updatedStatisticsParameters = updateStatisticsParameters(partition.getParameters(), updatedStatistics.getBasicStatistics());
partition = Partition.builder(partition).setParameters(updatedStatisticsParameters).build();
Map<String, HiveColumnStatistics> updatedColumnStatistics = updatedStatistics.getColumnStatistics();
PartitionInput partitionInput = GlueInputConverter.convertPartition(partition);
partitionInput.setParameters(partition.getParameters());
partitionUpdateRequests.add(new BatchUpdatePartitionRequestEntry().withPartitionValueList(partition.getValues()).withPartitionInput(partitionInput));
columnStatisticsUpdates.add(new GlueColumnStatisticsProvider.PartitionStatisticsUpdate(partition, updatedColumnStatistics));
});
List<List<BatchUpdatePartitionRequestEntry>> partitionUpdateRequestsPartitioned = Lists.partition(partitionUpdateRequests.build(), BATCH_UPDATE_PARTITION_MAX_PAGE_SIZE);
List<Future<BatchUpdatePartitionResult>> partitionUpdateRequestsFutures = new ArrayList<>();
partitionUpdateRequestsPartitioned.forEach(partitionUpdateRequestsPartition -> {
// Update basic statistics
long startTimestamp = System.currentTimeMillis();
partitionUpdateRequestsFutures.add(glueClient.batchUpdatePartitionAsync(new BatchUpdatePartitionRequest().withCatalogId(catalogId).withDatabaseName(table.getDatabaseName()).withTableName(table.getTableName()).withEntries(partitionUpdateRequestsPartition), new StatsRecordingAsyncHandler(stats.getBatchUpdatePartition(), startTimestamp)));
});
try {
// Update column statistics
columnStatisticsProvider.updatePartitionStatistics(columnStatisticsUpdates.build());
// Don't block on the batch update call until the column statistics have finished updating
partitionUpdateRequestsFutures.forEach(MoreFutures::getFutureValue);
} catch (AmazonServiceException e) {
throw new TrinoException(HIVE_METASTORE_ERROR, e);
}
}
use of io.airlift.concurrent.MoreFutures in project trino by trinodb.
the class TestIcebergInsert method testIcebergConcurrentInsert.
/**
* @see TestIcebergCreateTable#testCreateTable() See TestIcebergCreateTable for a non-concurrent INSERT test coverage.
* @see TestIcebergSparkCompatibility#testTrinoSparkConcurrentInsert()
*/
@Test(groups = { ICEBERG, STORAGE_FORMATS_DETAILED, HMS_ONLY }, timeOut = 60_000)
public void testIcebergConcurrentInsert() throws Exception {
int threads = 3;
int insertsPerThread = 7;
String tableName = "iceberg.default.test_insert_concurrent_" + randomTableSuffix();
onTrino().executeQuery("CREATE TABLE " + tableName + "(a bigint)");
ExecutorService executor = Executors.newFixedThreadPool(threads);
try {
CyclicBarrier barrier = new CyclicBarrier(threads);
QueryExecutor onTrino = onTrino();
List<Long> allInserted = executor.invokeAll(IntStream.range(0, threads).mapToObj(thread -> (Callable<List<Long>>) () -> {
List<Long> inserted = new ArrayList<>();
for (int i = 0; i < insertsPerThread; i++) {
barrier.await(20, SECONDS);
long value = i + (long) insertsPerThread * thread;
try {
onTrino.executeQuery("INSERT INTO " + tableName + " VALUES " + value);
} catch (QueryExecutionException queryExecutionException) {
// failed to insert
continue;
}
inserted.add(value);
}
return inserted;
}).collect(toImmutableList())).stream().map(MoreFutures::getDone).flatMap(List::stream).collect(toImmutableList());
// At least one INSERT per round should succeed
Assertions.assertThat(allInserted).hasSizeBetween(insertsPerThread, threads * insertsPerThread);
assertThat(onTrino().executeQuery("SELECT * FROM " + tableName)).containsOnly(allInserted.stream().map(QueryAssert.Row::row).toArray(QueryAssert.Row[]::new));
onTrino().executeQuery("DROP TABLE " + tableName);
} finally {
executor.shutdownNow();
}
}
Aggregations