use of com.facebook.presto.hive.metastore.PartitionWithStatistics in project presto by prestodb.
the class FileHiveMetastore method addPartitions.
@Override
public synchronized void addPartitions(MetastoreContext metastoreContext, String databaseName, String tableName, List<PartitionWithStatistics> partitions) {
requireNonNull(databaseName, "databaseName is null");
requireNonNull(tableName, "tableName is null");
requireNonNull(partitions, "partitions is null");
Table table = getRequiredTable(metastoreContext, databaseName, tableName);
checkArgument(EnumSet.of(MANAGED_TABLE, EXTERNAL_TABLE, MATERIALIZED_VIEW).contains(table.getTableType()), "Invalid table type: %s", table.getTableType());
try {
Map<Path, byte[]> schemaFiles = new LinkedHashMap<>();
for (PartitionWithStatistics partitionWithStatistics : partitions) {
Partition partition = partitionWithStatistics.getPartition();
verifiedPartition(table, partition);
Path partitionMetadataDirectory = getPartitionMetadataDirectory(table, partition.getValues());
Path schemaPath = new Path(partitionMetadataDirectory, PRESTO_SCHEMA_FILE_NAME);
if (metadataFileSystem.exists(schemaPath)) {
throw new PrestoException(HIVE_METASTORE_ERROR, "Partition already exists");
}
byte[] schemaJson = partitionCodec.toJsonBytes(new PartitionMetadata(table, partitionWithStatistics));
schemaFiles.put(schemaPath, schemaJson);
}
Set<Path> createdFiles = new LinkedHashSet<>();
try {
for (Entry<Path, byte[]> entry : schemaFiles.entrySet()) {
try (OutputStream outputStream = metadataFileSystem.create(entry.getKey())) {
createdFiles.add(entry.getKey());
outputStream.write(entry.getValue());
} catch (IOException e) {
throw new PrestoException(HIVE_METASTORE_ERROR, "Could not write partition schema", e);
}
}
} catch (Throwable e) {
for (Path createdFile : createdFiles) {
try {
metadataFileSystem.delete(createdFile, false);
} catch (IOException ignored) {
}
}
throw e;
}
} catch (IOException e) {
throw new PrestoException(HIVE_METASTORE_ERROR, e);
}
}
use of com.facebook.presto.hive.metastore.PartitionWithStatistics in project presto by prestodb.
the class InMemoryHiveMetastore method addPartitions.
@Override
public synchronized void addPartitions(MetastoreContext metastoreContext, String databaseName, String tableName, List<PartitionWithStatistics> partitionsWithStatistics) {
for (PartitionWithStatistics partitionWithStatistics : partitionsWithStatistics) {
PartitionName partitionKey = PartitionName.partition(databaseName, tableName, partitionWithStatistics.getPartitionName());
partitions.put(partitionKey, partitionWithStatistics.getPartition());
partitionColumnStatistics.put(partitionKey, partitionWithStatistics.getStatistics());
}
}
use of com.facebook.presto.hive.metastore.PartitionWithStatistics in project presto by prestodb.
the class GlueHiveMetastore method addPartitions.
@Override
public void addPartitions(MetastoreContext metastoreContext, String databaseName, String tableName, List<PartitionWithStatistics> partitions) {
try {
List<Future<BatchCreatePartitionResult>> futures = new ArrayList<>();
for (List<PartitionWithStatistics> partitionBatch : Lists.partition(partitions, BATCH_CREATE_PARTITION_MAX_PAGE_SIZE)) {
List<PartitionInput> partitionInputs = mappedCopy(partitionBatch, GlueInputConverter::convertPartition);
futures.add(glueClient.batchCreatePartitionAsync(new BatchCreatePartitionRequest().withCatalogId(catalogId).withDatabaseName(databaseName).withTableName(tableName).withPartitionInputList(partitionInputs), stats.getBatchCreatePartitions().metricsAsyncHandler()));
}
for (Future<BatchCreatePartitionResult> future : futures) {
BatchCreatePartitionResult result = future.get();
propagatePartitionErrorToPrestoException(databaseName, tableName, result.getErrors());
}
} catch (AmazonServiceException | InterruptedException | ExecutionException e) {
if (e instanceof InterruptedException) {
Thread.currentThread().interrupt();
}
throw new PrestoException(HIVE_METASTORE_ERROR, e);
}
}
use of com.facebook.presto.hive.metastore.PartitionWithStatistics in project presto by prestodb.
the class TestHiveSplitManager method assertRedundantColumnDomains.
private void assertRedundantColumnDomains(Range predicateRange, PartitionStatistics partitionStatistics, List<Set<ColumnHandle>> expectedRedundantColumnDomains, HiveColumnHandle columnHandle) throws Exception {
// Prepare query predicate tuple domain
TupleDomain<ColumnHandle> queryTupleDomain = TupleDomain.fromColumnDomains(Optional.of(ImmutableList.of(new ColumnDomain<>(columnHandle, Domain.create(SortedRangeSet.copyOf(predicateRange.getType(), ImmutableList.of(predicateRange)), false)))));
// Prepare partition with stats
PartitionWithStatistics partitionWithStatistics = new PartitionWithStatistics(new Partition("test_db", "test_table", ImmutableList.of(PARTITION_VALUE), new Storage(fromHiveStorageFormat(ORC), "location", Optional.empty(), true, ImmutableMap.of(), ImmutableMap.of()), COLUMNS, ImmutableMap.of(), Optional.empty(), false, true, 0), PARTITION_NAME, partitionStatistics);
HiveClientConfig hiveClientConfig = new HiveClientConfig().setPartitionStatisticsBasedOptimizationEnabled(true);
HdfsEnvironment hdfsEnvironment = new HdfsEnvironment(new HiveHdfsConfiguration(new HdfsConfigurationInitializer(hiveClientConfig, new MetastoreClientConfig()), ImmutableSet.of()), new MetastoreClientConfig(), new NoHdfsAuthentication());
HiveMetadataFactory metadataFactory = new HiveMetadataFactory(new TestingExtendedHiveMetastore(TEST_TABLE, partitionWithStatistics), hdfsEnvironment, new HivePartitionManager(FUNCTION_AND_TYPE_MANAGER, hiveClientConfig), DateTimeZone.forOffsetHours(1), true, false, false, false, true, true, hiveClientConfig.getMaxPartitionBatchSize(), hiveClientConfig.getMaxPartitionsPerScan(), false, FUNCTION_AND_TYPE_MANAGER, new HiveLocationService(hdfsEnvironment), FUNCTION_RESOLUTION, ROW_EXPRESSION_SERVICE, FILTER_STATS_CALCULATOR_SERVICE, new TableParameterCodec(), HiveTestUtils.PARTITION_UPDATE_CODEC, HiveTestUtils.PARTITION_UPDATE_SMILE_CODEC, executor, new HiveTypeTranslator(), new HiveStagingFileCommitter(hdfsEnvironment, executor), new HiveZeroRowFileCreator(hdfsEnvironment, new OutputStreamDataSinkFactory(), executor), TEST_SERVER_VERSION, new HivePartitionObjectBuilder(), new HiveEncryptionInformationProvider(ImmutableList.of()), new HivePartitionStats(), new HiveFileRenamer(), HiveColumnConverterProvider.DEFAULT_COLUMN_CONVERTER_PROVIDER);
HiveSplitManager splitManager = new HiveSplitManager(new TestingHiveTransactionManager(metadataFactory), new NamenodeStats(), hdfsEnvironment, new TestingDirectoryLister(), directExecutor(), new HiveCoercionPolicy(FUNCTION_AND_TYPE_MANAGER), new CounterStat(), 100, hiveClientConfig.getMaxOutstandingSplitsSize(), hiveClientConfig.getMinPartitionBatchSize(), hiveClientConfig.getMaxPartitionBatchSize(), hiveClientConfig.getSplitLoaderConcurrency(), false, new ConfigBasedCacheQuotaRequirementProvider(new CacheConfig()), new HiveEncryptionInformationProvider(ImmutableList.of()));
HiveColumnHandle partitionColumn = new HiveColumnHandle("ds", HIVE_STRING, parseTypeSignature(VARCHAR), MAX_PARTITION_KEY_COLUMN_INDEX, PARTITION_KEY, Optional.empty(), Optional.empty());
List<HivePartition> partitions = ImmutableList.of(new HivePartition(new SchemaTableName("test_schema", "test_table"), PARTITION_NAME, ImmutableMap.of(partitionColumn, NullableValue.of(createUnboundedVarcharType(), utf8Slice(PARTITION_VALUE)))));
TupleDomain<Subfield> domainPredicate = queryTupleDomain.transform(HiveColumnHandle.class::cast).transform(column -> new Subfield(column.getName(), ImmutableList.of()));
ConnectorSplitSource splitSource = splitManager.getSplits(new HiveTransactionHandle(), new TestingConnectorSession(new HiveSessionProperties(hiveClientConfig, new OrcFileWriterConfig(), new ParquetFileWriterConfig(), new CacheConfig()).getSessionProperties()), new HiveTableLayoutHandle(new SchemaTableName("test_schema", "test_table"), "test_path", ImmutableList.of(partitionColumn), COLUMNS, ImmutableMap.of(), partitions, domainPredicate, TRUE_CONSTANT, ImmutableMap.of(partitionColumn.getName(), partitionColumn, columnHandle.getName(), columnHandle), queryTupleDomain, Optional.empty(), Optional.empty(), false, "layout", Optional.empty(), false), SPLIT_SCHEDULING_CONTEXT);
List<Set<ColumnHandle>> actualRedundantColumnDomains = splitSource.getNextBatch(NOT_PARTITIONED, 100).get().getSplits().stream().map(HiveSplit.class::cast).map(HiveSplit::getRedundantColumnDomains).collect(toImmutableList());
assertEquals(actualRedundantColumnDomains, expectedRedundantColumnDomains);
}
use of com.facebook.presto.hive.metastore.PartitionWithStatistics in project presto by prestodb.
the class TestHiveLogicalPlanner method testMetadataAggregationFoldingWithEmptyPartitions.
@Test
public void testMetadataAggregationFoldingWithEmptyPartitions() {
QueryRunner queryRunner = getQueryRunner();
Session optimizeMetadataQueries = Session.builder(this.getQueryRunner().getDefaultSession()).setSystemProperty(OPTIMIZE_METADATA_QUERIES, Boolean.toString(true)).build();
Session optimizeMetadataQueriesIgnoreStats = Session.builder(this.getQueryRunner().getDefaultSession()).setSystemProperty(OPTIMIZE_METADATA_QUERIES_IGNORE_STATS, Boolean.toString(true)).build();
Session shufflePartitionColumns = Session.builder(this.getQueryRunner().getDefaultSession()).setCatalogSessionProperty(HIVE_CATALOG, SHUFFLE_PARTITIONED_COLUMNS_FOR_TABLE_WRITE, Boolean.toString(true)).build();
queryRunner.execute(shufflePartitionColumns, "CREATE TABLE test_metadata_aggregation_folding_with_empty_partitions WITH (partitioned_by = ARRAY['ds']) AS " + "SELECT orderkey, CAST(to_iso8601(date_add('DAY', orderkey % 2, date('2020-07-01'))) AS VARCHAR) AS ds FROM orders WHERE orderkey < 1000");
ExtendedHiveMetastore metastore = replicateHiveMetastore((DistributedQueryRunner) queryRunner);
MetastoreContext metastoreContext = new MetastoreContext(getSession().getUser(), getSession().getQueryId().getId(), Optional.empty(), Optional.empty(), Optional.empty(), false, HiveColumnConverterProvider.DEFAULT_COLUMN_CONVERTER_PROVIDER);
Table table = metastore.getTable(metastoreContext, getSession().getSchema().get(), "test_metadata_aggregation_folding_with_empty_partitions").get();
// Add one partition with no statistics.
String partitionNameNoStats = "ds=2020-07-20";
Partition partitionNoStats = createDummyPartition(table, partitionNameNoStats);
metastore.addPartitions(metastoreContext, table.getDatabaseName(), table.getTableName(), ImmutableList.of(new PartitionWithStatistics(partitionNoStats, partitionNameNoStats, PartitionStatistics.empty())));
// Add one partition with statistics indicating that it has no rows.
String emptyPartitionName = "ds=2020-06-30";
Partition emptyPartition = createDummyPartition(table, emptyPartitionName);
metastore.addPartitions(metastoreContext, table.getDatabaseName(), table.getTableName(), ImmutableList.of(new PartitionWithStatistics(emptyPartition, emptyPartitionName, PartitionStatistics.builder().setBasicStatistics(new HiveBasicStatistics(1, 0, 0, 0)).build())));
try {
// Max ds doesn't have stats. Disable rewrite.
assertPlan(optimizeMetadataQueries, "SELECT * FROM test_metadata_aggregation_folding_with_empty_partitions WHERE ds = (SELECT max(ds) from test_metadata_aggregation_folding_with_empty_partitions)", anyTree(anyTree(PlanMatchPattern.tableScan("test_metadata_aggregation_folding_with_empty_partitions")), anyTree(PlanMatchPattern.tableScan("test_metadata_aggregation_folding_with_empty_partitions"))));
// Ignore metastore stats. Enable rewrite.
assertPlan(optimizeMetadataQueriesIgnoreStats, "SELECT * FROM test_metadata_aggregation_folding_with_empty_partitions WHERE ds = (SELECT max(ds) from test_metadata_aggregation_folding_with_empty_partitions)", anyTree(join(INNER, ImmutableList.of(), tableScan("test_metadata_aggregation_folding_with_empty_partitions", getSingleValueColumnDomain("ds", "2020-07-20"), TRUE_CONSTANT, ImmutableSet.of("ds")), anyTree(any()))));
// Max ds matching the filter has stats. Enable rewrite.
assertPlan(optimizeMetadataQueries, "SELECT * FROM test_metadata_aggregation_folding_with_empty_partitions WHERE ds = (SELECT max(ds) from test_metadata_aggregation_folding_with_empty_partitions WHERE ds <= '2020-07-02')", anyTree(join(INNER, ImmutableList.of(), tableScan("test_metadata_aggregation_folding_with_empty_partitions", getSingleValueColumnDomain("ds", "2020-07-02"), TRUE_CONSTANT, ImmutableSet.of("ds")), anyTree(any()))));
// Min ds partition stats indicates that it is an empty partition. Disable rewrite.
assertPlan(optimizeMetadataQueries, "SELECT * FROM test_metadata_aggregation_folding_with_empty_partitions WHERE ds = (SELECT min(ds) from test_metadata_aggregation_folding_with_empty_partitions)", anyTree(anyTree(PlanMatchPattern.tableScan("test_metadata_aggregation_folding_with_empty_partitions")), anyTree(PlanMatchPattern.tableScan("test_metadata_aggregation_folding_with_empty_partitions"))));
// Min ds partition matching the filter has non-empty stats. Enable rewrite.
assertPlan(optimizeMetadataQueries, "SELECT * FROM test_metadata_aggregation_folding_with_empty_partitions WHERE ds = (SELECT min(ds) from test_metadata_aggregation_folding_with_empty_partitions WHERE ds >= '2020-07-01')", anyTree(join(INNER, ImmutableList.of(), tableScan("test_metadata_aggregation_folding_with_empty_partitions", getSingleValueColumnDomain("ds", "2020-07-01"), TRUE_CONSTANT, ImmutableSet.of("ds")), anyTree(any()))));
// Test the non-reducible code path.
// Disable rewrite as there are partitions with empty stats.
assertPlan(optimizeMetadataQueries, "SELECT DISTINCT ds FROM test_metadata_aggregation_folding_with_empty_partitions", anyTree(tableScanWithConstraint("test_metadata_aggregation_folding_with_empty_partitions", ImmutableMap.of("ds", multipleValues(VARCHAR, utf8Slices("2020-06-30", "2020-07-01", "2020-07-02", "2020-07-20"))))));
// Enable rewrite as all matching partitions have stats.
assertPlan(optimizeMetadataQueries, "SELECT DISTINCT ds FROM test_metadata_aggregation_folding_with_empty_partitions WHERE ds BETWEEN '2020-07-01' AND '2020-07-03'", anyTree(values(ImmutableList.of("ds"), ImmutableList.of(ImmutableList.of(new StringLiteral("2020-07-01")), ImmutableList.of(new StringLiteral("2020-07-02"))))));
// One of two resulting partitions doesn't have stats. Disable rewrite.
assertPlan(optimizeMetadataQueries, "SELECT MIN(ds), MAX(ds) FROM test_metadata_aggregation_folding_with_empty_partitions WHERE ds BETWEEN '2020-06-30' AND '2020-07-03'", anyTree(tableScanWithConstraint("test_metadata_aggregation_folding_with_empty_partitions", ImmutableMap.of("ds", multipleValues(VARCHAR, utf8Slices("2020-06-30", "2020-07-01", "2020-07-02"))))));
// Ignore metadata stats. Always enable rewrite.
assertPlan(optimizeMetadataQueriesIgnoreStats, "SELECT MIN(ds), MAX(ds) FROM test_metadata_aggregation_folding_with_empty_partitions WHERE ds BETWEEN '2020-06-30' AND '2020-07-03'", anyTree(project(ImmutableMap.of("min", expression("'2020-06-30'"), "max", expression("'2020-07-02'")), anyTree(values()))));
// Both resulting partitions have stats. Enable rewrite.
assertPlan(optimizeMetadataQueries, "SELECT MIN(ds), MAX(ds) FROM test_metadata_aggregation_folding_with_empty_partitions WHERE ds BETWEEN '2020-07-01' AND '2020-07-03'", anyTree(project(ImmutableMap.of("min", expression("'2020-07-01'"), "max", expression("'2020-07-02'")), anyTree(values()))));
} finally {
queryRunner.execute("DROP TABLE IF EXISTS test_metadata_aggregation_folding_with_empty_partitions");
}
}
Aggregations