Search in sources :

Example 6 with PartitionWithStatistics

use of com.facebook.presto.hive.metastore.PartitionWithStatistics in project presto by prestodb.

the class FileHiveMetastore method addPartitions.

@Override
public synchronized void addPartitions(MetastoreContext metastoreContext, String databaseName, String tableName, List<PartitionWithStatistics> partitions) {
    requireNonNull(databaseName, "databaseName is null");
    requireNonNull(tableName, "tableName is null");
    requireNonNull(partitions, "partitions is null");
    Table table = getRequiredTable(metastoreContext, databaseName, tableName);
    checkArgument(EnumSet.of(MANAGED_TABLE, EXTERNAL_TABLE, MATERIALIZED_VIEW).contains(table.getTableType()), "Invalid table type: %s", table.getTableType());
    try {
        Map<Path, byte[]> schemaFiles = new LinkedHashMap<>();
        for (PartitionWithStatistics partitionWithStatistics : partitions) {
            Partition partition = partitionWithStatistics.getPartition();
            verifiedPartition(table, partition);
            Path partitionMetadataDirectory = getPartitionMetadataDirectory(table, partition.getValues());
            Path schemaPath = new Path(partitionMetadataDirectory, PRESTO_SCHEMA_FILE_NAME);
            if (metadataFileSystem.exists(schemaPath)) {
                throw new PrestoException(HIVE_METASTORE_ERROR, "Partition already exists");
            }
            byte[] schemaJson = partitionCodec.toJsonBytes(new PartitionMetadata(table, partitionWithStatistics));
            schemaFiles.put(schemaPath, schemaJson);
        }
        Set<Path> createdFiles = new LinkedHashSet<>();
        try {
            for (Entry<Path, byte[]> entry : schemaFiles.entrySet()) {
                try (OutputStream outputStream = metadataFileSystem.create(entry.getKey())) {
                    createdFiles.add(entry.getKey());
                    outputStream.write(entry.getValue());
                } catch (IOException e) {
                    throw new PrestoException(HIVE_METASTORE_ERROR, "Could not write partition schema", e);
                }
            }
        } catch (Throwable e) {
            for (Path createdFile : createdFiles) {
                try {
                    metadataFileSystem.delete(createdFile, false);
                } catch (IOException ignored) {
                }
            }
            throw e;
        }
    } catch (IOException e) {
        throw new PrestoException(HIVE_METASTORE_ERROR, e);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) LinkedHashSet(java.util.LinkedHashSet) Partition(com.facebook.presto.hive.metastore.Partition) Table(com.facebook.presto.hive.metastore.Table) OutputStream(java.io.OutputStream) PrestoException(com.facebook.presto.spi.PrestoException) IOException(java.io.IOException) LinkedHashMap(java.util.LinkedHashMap) PartitionWithStatistics(com.facebook.presto.hive.metastore.PartitionWithStatistics)

Example 7 with PartitionWithStatistics

use of com.facebook.presto.hive.metastore.PartitionWithStatistics in project presto by prestodb.

the class InMemoryHiveMetastore method addPartitions.

@Override
public synchronized void addPartitions(MetastoreContext metastoreContext, String databaseName, String tableName, List<PartitionWithStatistics> partitionsWithStatistics) {
    for (PartitionWithStatistics partitionWithStatistics : partitionsWithStatistics) {
        PartitionName partitionKey = PartitionName.partition(databaseName, tableName, partitionWithStatistics.getPartitionName());
        partitions.put(partitionKey, partitionWithStatistics.getPartition());
        partitionColumnStatistics.put(partitionKey, partitionWithStatistics.getStatistics());
    }
}
Also used : PartitionWithStatistics(com.facebook.presto.hive.metastore.PartitionWithStatistics)

Example 8 with PartitionWithStatistics

use of com.facebook.presto.hive.metastore.PartitionWithStatistics in project presto by prestodb.

the class GlueHiveMetastore method addPartitions.

@Override
public void addPartitions(MetastoreContext metastoreContext, String databaseName, String tableName, List<PartitionWithStatistics> partitions) {
    try {
        List<Future<BatchCreatePartitionResult>> futures = new ArrayList<>();
        for (List<PartitionWithStatistics> partitionBatch : Lists.partition(partitions, BATCH_CREATE_PARTITION_MAX_PAGE_SIZE)) {
            List<PartitionInput> partitionInputs = mappedCopy(partitionBatch, GlueInputConverter::convertPartition);
            futures.add(glueClient.batchCreatePartitionAsync(new BatchCreatePartitionRequest().withCatalogId(catalogId).withDatabaseName(databaseName).withTableName(tableName).withPartitionInputList(partitionInputs), stats.getBatchCreatePartitions().metricsAsyncHandler()));
        }
        for (Future<BatchCreatePartitionResult> future : futures) {
            BatchCreatePartitionResult result = future.get();
            propagatePartitionErrorToPrestoException(databaseName, tableName, result.getErrors());
        }
    } catch (AmazonServiceException | InterruptedException | ExecutionException e) {
        if (e instanceof InterruptedException) {
            Thread.currentThread().interrupt();
        }
        throw new PrestoException(HIVE_METASTORE_ERROR, e);
    }
}
Also used : ArrayList(java.util.ArrayList) PrestoException(com.facebook.presto.spi.PrestoException) PartitionInput(com.amazonaws.services.glue.model.PartitionInput) GlueInputConverter(com.facebook.presto.hive.metastore.glue.converter.GlueInputConverter) BatchCreatePartitionRequest(com.amazonaws.services.glue.model.BatchCreatePartitionRequest) PartitionWithStatistics(com.facebook.presto.hive.metastore.PartitionWithStatistics) BatchCreatePartitionResult(com.amazonaws.services.glue.model.BatchCreatePartitionResult) AmazonServiceException(com.amazonaws.AmazonServiceException) Future(java.util.concurrent.Future) ExecutionException(java.util.concurrent.ExecutionException)

Example 9 with PartitionWithStatistics

use of com.facebook.presto.hive.metastore.PartitionWithStatistics in project presto by prestodb.

the class TestHiveSplitManager method assertRedundantColumnDomains.

private void assertRedundantColumnDomains(Range predicateRange, PartitionStatistics partitionStatistics, List<Set<ColumnHandle>> expectedRedundantColumnDomains, HiveColumnHandle columnHandle) throws Exception {
    // Prepare query predicate tuple domain
    TupleDomain<ColumnHandle> queryTupleDomain = TupleDomain.fromColumnDomains(Optional.of(ImmutableList.of(new ColumnDomain<>(columnHandle, Domain.create(SortedRangeSet.copyOf(predicateRange.getType(), ImmutableList.of(predicateRange)), false)))));
    // Prepare partition with stats
    PartitionWithStatistics partitionWithStatistics = new PartitionWithStatistics(new Partition("test_db", "test_table", ImmutableList.of(PARTITION_VALUE), new Storage(fromHiveStorageFormat(ORC), "location", Optional.empty(), true, ImmutableMap.of(), ImmutableMap.of()), COLUMNS, ImmutableMap.of(), Optional.empty(), false, true, 0), PARTITION_NAME, partitionStatistics);
    HiveClientConfig hiveClientConfig = new HiveClientConfig().setPartitionStatisticsBasedOptimizationEnabled(true);
    HdfsEnvironment hdfsEnvironment = new HdfsEnvironment(new HiveHdfsConfiguration(new HdfsConfigurationInitializer(hiveClientConfig, new MetastoreClientConfig()), ImmutableSet.of()), new MetastoreClientConfig(), new NoHdfsAuthentication());
    HiveMetadataFactory metadataFactory = new HiveMetadataFactory(new TestingExtendedHiveMetastore(TEST_TABLE, partitionWithStatistics), hdfsEnvironment, new HivePartitionManager(FUNCTION_AND_TYPE_MANAGER, hiveClientConfig), DateTimeZone.forOffsetHours(1), true, false, false, false, true, true, hiveClientConfig.getMaxPartitionBatchSize(), hiveClientConfig.getMaxPartitionsPerScan(), false, FUNCTION_AND_TYPE_MANAGER, new HiveLocationService(hdfsEnvironment), FUNCTION_RESOLUTION, ROW_EXPRESSION_SERVICE, FILTER_STATS_CALCULATOR_SERVICE, new TableParameterCodec(), HiveTestUtils.PARTITION_UPDATE_CODEC, HiveTestUtils.PARTITION_UPDATE_SMILE_CODEC, executor, new HiveTypeTranslator(), new HiveStagingFileCommitter(hdfsEnvironment, executor), new HiveZeroRowFileCreator(hdfsEnvironment, new OutputStreamDataSinkFactory(), executor), TEST_SERVER_VERSION, new HivePartitionObjectBuilder(), new HiveEncryptionInformationProvider(ImmutableList.of()), new HivePartitionStats(), new HiveFileRenamer(), HiveColumnConverterProvider.DEFAULT_COLUMN_CONVERTER_PROVIDER);
    HiveSplitManager splitManager = new HiveSplitManager(new TestingHiveTransactionManager(metadataFactory), new NamenodeStats(), hdfsEnvironment, new TestingDirectoryLister(), directExecutor(), new HiveCoercionPolicy(FUNCTION_AND_TYPE_MANAGER), new CounterStat(), 100, hiveClientConfig.getMaxOutstandingSplitsSize(), hiveClientConfig.getMinPartitionBatchSize(), hiveClientConfig.getMaxPartitionBatchSize(), hiveClientConfig.getSplitLoaderConcurrency(), false, new ConfigBasedCacheQuotaRequirementProvider(new CacheConfig()), new HiveEncryptionInformationProvider(ImmutableList.of()));
    HiveColumnHandle partitionColumn = new HiveColumnHandle("ds", HIVE_STRING, parseTypeSignature(VARCHAR), MAX_PARTITION_KEY_COLUMN_INDEX, PARTITION_KEY, Optional.empty(), Optional.empty());
    List<HivePartition> partitions = ImmutableList.of(new HivePartition(new SchemaTableName("test_schema", "test_table"), PARTITION_NAME, ImmutableMap.of(partitionColumn, NullableValue.of(createUnboundedVarcharType(), utf8Slice(PARTITION_VALUE)))));
    TupleDomain<Subfield> domainPredicate = queryTupleDomain.transform(HiveColumnHandle.class::cast).transform(column -> new Subfield(column.getName(), ImmutableList.of()));
    ConnectorSplitSource splitSource = splitManager.getSplits(new HiveTransactionHandle(), new TestingConnectorSession(new HiveSessionProperties(hiveClientConfig, new OrcFileWriterConfig(), new ParquetFileWriterConfig(), new CacheConfig()).getSessionProperties()), new HiveTableLayoutHandle(new SchemaTableName("test_schema", "test_table"), "test_path", ImmutableList.of(partitionColumn), COLUMNS, ImmutableMap.of(), partitions, domainPredicate, TRUE_CONSTANT, ImmutableMap.of(partitionColumn.getName(), partitionColumn, columnHandle.getName(), columnHandle), queryTupleDomain, Optional.empty(), Optional.empty(), false, "layout", Optional.empty(), false), SPLIT_SCHEDULING_CONTEXT);
    List<Set<ColumnHandle>> actualRedundantColumnDomains = splitSource.getNextBatch(NOT_PARTITIONED, 100).get().getSplits().stream().map(HiveSplit.class::cast).map(HiveSplit::getRedundantColumnDomains).collect(toImmutableList());
    assertEquals(actualRedundantColumnDomains, expectedRedundantColumnDomains);
}
Also used : CounterStat(com.facebook.airlift.stats.CounterStat) Subfield(com.facebook.presto.common.Subfield) ColumnHandle(com.facebook.presto.spi.ColumnHandle) TestingConnectorSession(com.facebook.presto.testing.TestingConnectorSession) Storage(com.facebook.presto.hive.metastore.Storage) OutputStreamDataSinkFactory(com.facebook.presto.hive.datasink.OutputStreamDataSinkFactory) Set(java.util.Set) SortedRangeSet(com.facebook.presto.common.predicate.SortedRangeSet) ImmutableSet(com.google.common.collect.ImmutableSet) NoHdfsAuthentication(com.facebook.presto.hive.authentication.NoHdfsAuthentication) CacheConfig(com.facebook.presto.cache.CacheConfig) Partition(com.facebook.presto.hive.metastore.Partition) ConnectorSplitSource(com.facebook.presto.spi.ConnectorSplitSource) SchemaTableName(com.facebook.presto.spi.SchemaTableName) PartitionWithStatistics(com.facebook.presto.hive.metastore.PartitionWithStatistics)

Example 10 with PartitionWithStatistics

use of com.facebook.presto.hive.metastore.PartitionWithStatistics in project presto by prestodb.

the class TestHiveLogicalPlanner method testMetadataAggregationFoldingWithEmptyPartitions.

@Test
public void testMetadataAggregationFoldingWithEmptyPartitions() {
    QueryRunner queryRunner = getQueryRunner();
    Session optimizeMetadataQueries = Session.builder(this.getQueryRunner().getDefaultSession()).setSystemProperty(OPTIMIZE_METADATA_QUERIES, Boolean.toString(true)).build();
    Session optimizeMetadataQueriesIgnoreStats = Session.builder(this.getQueryRunner().getDefaultSession()).setSystemProperty(OPTIMIZE_METADATA_QUERIES_IGNORE_STATS, Boolean.toString(true)).build();
    Session shufflePartitionColumns = Session.builder(this.getQueryRunner().getDefaultSession()).setCatalogSessionProperty(HIVE_CATALOG, SHUFFLE_PARTITIONED_COLUMNS_FOR_TABLE_WRITE, Boolean.toString(true)).build();
    queryRunner.execute(shufflePartitionColumns, "CREATE TABLE test_metadata_aggregation_folding_with_empty_partitions WITH (partitioned_by = ARRAY['ds']) AS " + "SELECT orderkey, CAST(to_iso8601(date_add('DAY', orderkey % 2, date('2020-07-01'))) AS VARCHAR) AS ds FROM orders WHERE orderkey < 1000");
    ExtendedHiveMetastore metastore = replicateHiveMetastore((DistributedQueryRunner) queryRunner);
    MetastoreContext metastoreContext = new MetastoreContext(getSession().getUser(), getSession().getQueryId().getId(), Optional.empty(), Optional.empty(), Optional.empty(), false, HiveColumnConverterProvider.DEFAULT_COLUMN_CONVERTER_PROVIDER);
    Table table = metastore.getTable(metastoreContext, getSession().getSchema().get(), "test_metadata_aggregation_folding_with_empty_partitions").get();
    // Add one partition with no statistics.
    String partitionNameNoStats = "ds=2020-07-20";
    Partition partitionNoStats = createDummyPartition(table, partitionNameNoStats);
    metastore.addPartitions(metastoreContext, table.getDatabaseName(), table.getTableName(), ImmutableList.of(new PartitionWithStatistics(partitionNoStats, partitionNameNoStats, PartitionStatistics.empty())));
    // Add one partition with statistics indicating that it has no rows.
    String emptyPartitionName = "ds=2020-06-30";
    Partition emptyPartition = createDummyPartition(table, emptyPartitionName);
    metastore.addPartitions(metastoreContext, table.getDatabaseName(), table.getTableName(), ImmutableList.of(new PartitionWithStatistics(emptyPartition, emptyPartitionName, PartitionStatistics.builder().setBasicStatistics(new HiveBasicStatistics(1, 0, 0, 0)).build())));
    try {
        // Max ds doesn't have stats. Disable rewrite.
        assertPlan(optimizeMetadataQueries, "SELECT * FROM test_metadata_aggregation_folding_with_empty_partitions WHERE ds = (SELECT max(ds) from test_metadata_aggregation_folding_with_empty_partitions)", anyTree(anyTree(PlanMatchPattern.tableScan("test_metadata_aggregation_folding_with_empty_partitions")), anyTree(PlanMatchPattern.tableScan("test_metadata_aggregation_folding_with_empty_partitions"))));
        // Ignore metastore stats. Enable rewrite.
        assertPlan(optimizeMetadataQueriesIgnoreStats, "SELECT * FROM test_metadata_aggregation_folding_with_empty_partitions WHERE ds = (SELECT max(ds) from test_metadata_aggregation_folding_with_empty_partitions)", anyTree(join(INNER, ImmutableList.of(), tableScan("test_metadata_aggregation_folding_with_empty_partitions", getSingleValueColumnDomain("ds", "2020-07-20"), TRUE_CONSTANT, ImmutableSet.of("ds")), anyTree(any()))));
        // Max ds matching the filter has stats. Enable rewrite.
        assertPlan(optimizeMetadataQueries, "SELECT * FROM test_metadata_aggregation_folding_with_empty_partitions WHERE ds = (SELECT max(ds) from test_metadata_aggregation_folding_with_empty_partitions WHERE ds <= '2020-07-02')", anyTree(join(INNER, ImmutableList.of(), tableScan("test_metadata_aggregation_folding_with_empty_partitions", getSingleValueColumnDomain("ds", "2020-07-02"), TRUE_CONSTANT, ImmutableSet.of("ds")), anyTree(any()))));
        // Min ds partition stats indicates that it is an empty partition. Disable rewrite.
        assertPlan(optimizeMetadataQueries, "SELECT * FROM test_metadata_aggregation_folding_with_empty_partitions WHERE ds = (SELECT min(ds) from test_metadata_aggregation_folding_with_empty_partitions)", anyTree(anyTree(PlanMatchPattern.tableScan("test_metadata_aggregation_folding_with_empty_partitions")), anyTree(PlanMatchPattern.tableScan("test_metadata_aggregation_folding_with_empty_partitions"))));
        // Min ds partition matching the filter has non-empty stats. Enable rewrite.
        assertPlan(optimizeMetadataQueries, "SELECT * FROM test_metadata_aggregation_folding_with_empty_partitions WHERE ds = (SELECT min(ds) from test_metadata_aggregation_folding_with_empty_partitions WHERE ds >= '2020-07-01')", anyTree(join(INNER, ImmutableList.of(), tableScan("test_metadata_aggregation_folding_with_empty_partitions", getSingleValueColumnDomain("ds", "2020-07-01"), TRUE_CONSTANT, ImmutableSet.of("ds")), anyTree(any()))));
        // Test the non-reducible code path.
        // Disable rewrite as there are partitions with empty stats.
        assertPlan(optimizeMetadataQueries, "SELECT DISTINCT ds FROM test_metadata_aggregation_folding_with_empty_partitions", anyTree(tableScanWithConstraint("test_metadata_aggregation_folding_with_empty_partitions", ImmutableMap.of("ds", multipleValues(VARCHAR, utf8Slices("2020-06-30", "2020-07-01", "2020-07-02", "2020-07-20"))))));
        // Enable rewrite as all matching partitions have stats.
        assertPlan(optimizeMetadataQueries, "SELECT DISTINCT ds FROM test_metadata_aggregation_folding_with_empty_partitions WHERE ds BETWEEN '2020-07-01' AND '2020-07-03'", anyTree(values(ImmutableList.of("ds"), ImmutableList.of(ImmutableList.of(new StringLiteral("2020-07-01")), ImmutableList.of(new StringLiteral("2020-07-02"))))));
        // One of two resulting partitions doesn't have stats. Disable rewrite.
        assertPlan(optimizeMetadataQueries, "SELECT MIN(ds), MAX(ds) FROM test_metadata_aggregation_folding_with_empty_partitions WHERE ds BETWEEN '2020-06-30' AND '2020-07-03'", anyTree(tableScanWithConstraint("test_metadata_aggregation_folding_with_empty_partitions", ImmutableMap.of("ds", multipleValues(VARCHAR, utf8Slices("2020-06-30", "2020-07-01", "2020-07-02"))))));
        // Ignore metadata stats. Always enable rewrite.
        assertPlan(optimizeMetadataQueriesIgnoreStats, "SELECT MIN(ds), MAX(ds) FROM test_metadata_aggregation_folding_with_empty_partitions WHERE ds BETWEEN '2020-06-30' AND '2020-07-03'", anyTree(project(ImmutableMap.of("min", expression("'2020-06-30'"), "max", expression("'2020-07-02'")), anyTree(values()))));
        // Both resulting partitions have stats. Enable rewrite.
        assertPlan(optimizeMetadataQueries, "SELECT MIN(ds), MAX(ds) FROM test_metadata_aggregation_folding_with_empty_partitions WHERE ds BETWEEN '2020-07-01' AND '2020-07-03'", anyTree(project(ImmutableMap.of("min", expression("'2020-07-01'"), "max", expression("'2020-07-02'")), anyTree(values()))));
    } finally {
        queryRunner.execute("DROP TABLE IF EXISTS test_metadata_aggregation_folding_with_empty_partitions");
    }
}
Also used : Partition(com.facebook.presto.hive.metastore.Partition) Table(com.facebook.presto.hive.metastore.Table) PartitionWithStatistics(com.facebook.presto.hive.metastore.PartitionWithStatistics) StringLiteral(com.facebook.presto.sql.tree.StringLiteral) MetastoreContext(com.facebook.presto.hive.metastore.MetastoreContext) ExtendedHiveMetastore(com.facebook.presto.hive.metastore.ExtendedHiveMetastore) QueryRunner(com.facebook.presto.testing.QueryRunner) DistributedQueryRunner(com.facebook.presto.tests.DistributedQueryRunner) Session(com.facebook.presto.Session) Test(org.testng.annotations.Test)

Aggregations

PartitionWithStatistics (com.facebook.presto.hive.metastore.PartitionWithStatistics)11 Partition (com.facebook.presto.hive.metastore.Partition)8 Table (com.facebook.presto.hive.metastore.Table)7 ExtendedHiveMetastore (com.facebook.presto.hive.metastore.ExtendedHiveMetastore)6 MetastoreContext (com.facebook.presto.hive.metastore.MetastoreContext)6 Test (org.testng.annotations.Test)6 PrestoException (com.facebook.presto.spi.PrestoException)5 SchemaTableName (com.facebook.presto.spi.SchemaTableName)5 ImmutableSet (com.google.common.collect.ImmutableSet)5 NoHdfsAuthentication (com.facebook.presto.hive.authentication.NoHdfsAuthentication)4 PartitionStatistics (com.facebook.presto.hive.metastore.PartitionStatistics)4 TableNotFoundException (com.facebook.presto.spi.TableNotFoundException)4 ImmutableList (com.google.common.collect.ImmutableList)4 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)4 ImmutableMap (com.google.common.collect.ImmutableMap)4 List (java.util.List)4 Map (java.util.Map)4 Optional (java.util.Optional)4 FileUtils.makePartName (org.apache.hadoop.hive.common.FileUtils.makePartName)4 Domain (com.facebook.presto.common.predicate.Domain)3