Search in sources :

Example 1 with Partition

use of io.prestosql.plugin.hive.metastore.Partition in project hetu-core by openlookeng.

the class GlueToPrestoConverter method convertPartition.

public static Partition convertPartition(com.amazonaws.services.glue.model.Partition gluePartition) {
    requireNonNull(gluePartition.getStorageDescriptor(), "Partition StorageDescriptor is null");
    StorageDescriptor sd = gluePartition.getStorageDescriptor();
    Partition.Builder partitionBuilder = Partition.builder().setDatabaseName(gluePartition.getDatabaseName()).setTableName(gluePartition.getTableName()).setValues(gluePartition.getValues()).setColumns(sd.getColumns().stream().map(GlueToPrestoConverter::convertColumn).collect(toList())).setParameters(firstNonNull(gluePartition.getParameters(), ImmutableMap.of()));
    setStorageBuilder(sd, partitionBuilder.getStorageBuilder());
    return partitionBuilder.build();
}
Also used : Partition(io.prestosql.plugin.hive.metastore.Partition) StorageDescriptor(com.amazonaws.services.glue.model.StorageDescriptor)

Example 2 with Partition

use of io.prestosql.plugin.hive.metastore.Partition in project hetu-core by openlookeng.

the class BridgingHiveMetastore method getPartitionsByNames.

@Override
public Map<String, Optional<Partition>> getPartitionsByNames(HiveIdentity identity, String databaseName, String tableName, List<String> partitionNames) {
    requireNonNull(partitionNames, "partitionNames is null");
    if (partitionNames.isEmpty()) {
        return ImmutableMap.of();
    }
    Function<org.apache.hadoop.hive.metastore.api.Partition, Partition> fromMetastoreApiPartition = ThriftMetastoreUtil::fromMetastoreApiPartition;
    boolean isAvroTableWithSchemaSet = delegate.getTable(identity, databaseName, tableName).map(ThriftMetastoreUtil::isAvroTableWithSchemaSet).orElse(false);
    if (isAvroTableWithSchemaSet) {
        List<FieldSchema> schema = delegate.getFields(identity, databaseName, tableName).get();
        fromMetastoreApiPartition = partition -> ThriftMetastoreUtil.fromMetastoreApiPartition(partition, schema);
    }
    Map<String, List<String>> partitionNameToPartitionValuesMap = partitionNames.stream().collect(Collectors.toMap(identity(), HiveUtil::toPartitionValues));
    Map<List<String>, Partition> partitionValuesToPartitionMap = delegate.getPartitionsByNames(identity, databaseName, tableName, partitionNames).stream().map(fromMetastoreApiPartition).collect(Collectors.toMap(Partition::getValues, identity()));
    ImmutableMap.Builder<String, Optional<Partition>> resultBuilder = ImmutableMap.builder();
    for (Map.Entry<String, List<String>> entry : partitionNameToPartitionValuesMap.entrySet()) {
        Partition partition = partitionValuesToPartitionMap.get(entry.getValue());
        resultBuilder.put(entry.getKey(), Optional.ofNullable(partition));
    }
    return resultBuilder.build();
}
Also used : Partition(io.prestosql.plugin.hive.metastore.Partition) HivePartition(io.prestosql.plugin.hive.HivePartition) Optional(java.util.Optional) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) ImmutableMap(com.google.common.collect.ImmutableMap) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) List(java.util.List) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap)

Example 3 with Partition

use of io.prestosql.plugin.hive.metastore.Partition in project hetu-core by openlookeng.

the class ThriftMetastoreUtil method fromMetastoreApiPartition.

public static Partition fromMetastoreApiPartition(org.apache.hadoop.hive.metastore.api.Partition partition, List<FieldSchema> schema) {
    StorageDescriptor storageDescriptor = partition.getSd();
    if (storageDescriptor == null) {
        throw new PrestoException(HiveErrorCode.HIVE_INVALID_METADATA, "Partition does not contain a storage descriptor: " + partition);
    }
    Partition.Builder partitionBuilder = Partition.builder().setDatabaseName(partition.getDbName()).setTableName(partition.getTableName()).setValues(partition.getValues()).setColumns(schema.stream().map(ThriftMetastoreUtil::fromMetastoreApiFieldSchema).collect(toList())).setParameters(partition.getParameters());
    // TODO is bucketing_version set on partition level??
    fromMetastoreApiStorageDescriptor(partition.getParameters(), storageDescriptor, partitionBuilder.getStorageBuilder(), format("%s.%s", partition.getTableName(), partition.getValues()));
    return partitionBuilder.build();
}
Also used : Partition(io.prestosql.plugin.hive.metastore.Partition) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor) PrestoException(io.prestosql.spi.PrestoException)

Example 4 with Partition

use of io.prestosql.plugin.hive.metastore.Partition in project hetu-core by openlookeng.

the class AbstractTestHive method doInsertIntoNewPartition.

private void doInsertIntoNewPartition(HiveStorageFormat storageFormat, SchemaTableName tableName) throws Exception {
    // creating the table
    doCreateEmptyTable(tableName, storageFormat, CREATE_TABLE_COLUMNS_PARTITIONED);
    // insert the data
    String queryId = insertData(tableName, CREATE_TABLE_PARTITIONED_DATA);
    Set<String> existingFiles;
    try (Transaction transaction = newTransaction()) {
        // verify partitions were created
        HiveIdentity identity = new HiveIdentity(newSession());
        List<String> partitionNames = transaction.getMetastore(tableName.getSchemaName()).getPartitionNames(identity, tableName.getSchemaName(), tableName.getTableName()).orElseThrow(() -> new AssertionError("Table does not exist: " + tableName));
        assertEqualsIgnoreOrder(partitionNames, CREATE_TABLE_PARTITIONED_DATA.getMaterializedRows().stream().map(row -> "ds=" + row.getField(CREATE_TABLE_PARTITIONED_DATA.getTypes().size() - 1)).collect(toList()));
        // verify the node versions in partitions
        Map<String, Optional<Partition>> partitions = getMetastoreClient().getPartitionsByNames(identity, tableName.getSchemaName(), tableName.getTableName(), partitionNames);
        assertEquals(partitions.size(), partitionNames.size());
        for (String partitionName : partitionNames) {
            Partition partition = partitions.get(partitionName).get();
            assertEquals(partition.getParameters().get(PRESTO_VERSION_NAME), TEST_SERVER_VERSION);
            assertEquals(partition.getParameters().get(PRESTO_QUERY_ID_NAME), queryId);
        }
        // load the new table
        ConnectorSession session = newSession();
        ConnectorMetadata metadata = transaction.getMetadata();
        metadata.beginQuery(session);
        ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
        List<ColumnHandle> columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values());
        // verify the data
        MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.of(storageFormat));
        assertEqualsIgnoreOrder(result.getMaterializedRows(), CREATE_TABLE_PARTITIONED_DATA.getMaterializedRows());
        // test rollback
        existingFiles = listAllDataFiles(transaction, tableName.getSchemaName(), tableName.getTableName());
        assertFalse(existingFiles.isEmpty());
        // test statistics
        for (String partitionName : partitionNames) {
            HiveBasicStatistics partitionStatistics = getBasicStatisticsForPartition(session, transaction, tableName, partitionName);
            assertEquals(partitionStatistics.getRowCount().getAsLong(), 1L);
            assertEquals(partitionStatistics.getFileCount().getAsLong(), 1L);
            assertGreaterThan(partitionStatistics.getInMemoryDataSizeInBytes().getAsLong(), 0L);
            assertGreaterThan(partitionStatistics.getOnDiskDataSizeInBytes().getAsLong(), 0L);
        }
    }
    Path stagingPathRoot;
    try (Transaction transaction = newTransaction()) {
        ConnectorSession session = newSession();
        ConnectorMetadata metadata = transaction.getMetadata();
        ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
        metadata.beginQuery(session);
        // "stage" insert data
        ConnectorInsertTableHandle insertTableHandle = metadata.beginInsert(session, tableHandle);
        stagingPathRoot = getStagingPathRoot(insertTableHandle);
        ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, insertTableHandle);
        sink.appendPage(CREATE_TABLE_PARTITIONED_DATA_2ND.toPage());
        Collection<Slice> fragments = getFutureValue(sink.finish());
        metadata.finishInsert(session, insertTableHandle, fragments, ImmutableList.of());
        // verify all temp files start with the unique prefix
        HdfsContext context = new HdfsContext(session, tableName.getSchemaName(), tableName.getTableName());
        Set<String> tempFiles = listAllDataFiles(context, getStagingPathRoot(insertTableHandle));
        assertTrue(!tempFiles.isEmpty());
        for (String filePath : tempFiles) {
            assertThat(new Path(filePath).getName()).startsWith(session.getQueryId());
        }
        // rollback insert
        transaction.rollback();
    }
    // verify the data is unchanged
    try (Transaction transaction = newTransaction()) {
        ConnectorSession session = newSession();
        ConnectorMetadata metadata = transaction.getMetadata();
        metadata.beginQuery(session);
        ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
        List<ColumnHandle> columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values());
        MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.empty());
        assertEqualsIgnoreOrder(result.getMaterializedRows(), CREATE_TABLE_PARTITIONED_DATA.getMaterializedRows());
        // verify we did not modify the table directory
        assertEquals(listAllDataFiles(transaction, tableName.getSchemaName(), tableName.getTableName()), existingFiles);
        // verify temp directory is empty
        HdfsContext context = new HdfsContext(session, tableName.getSchemaName(), tableName.getTableName());
        assertTrue(listAllDataFiles(context, stagingPathRoot).isEmpty());
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Partition(io.prestosql.plugin.hive.metastore.Partition) HiveColumnHandle.bucketColumnHandle(io.prestosql.plugin.hive.HiveColumnHandle.bucketColumnHandle) ColumnHandle(io.prestosql.spi.connector.ColumnHandle) Optional(java.util.Optional) ConnectorInsertTableHandle(io.prestosql.spi.connector.ConnectorInsertTableHandle) HiveIdentity(io.prestosql.plugin.hive.authentication.HiveIdentity) ConnectorTableHandle(io.prestosql.spi.connector.ConnectorTableHandle) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) Slice(io.airlift.slice.Slice) ConnectorSession(io.prestosql.spi.connector.ConnectorSession) TestingConnectorSession(io.prestosql.testing.TestingConnectorSession) ConnectorMetadata(io.prestosql.spi.connector.ConnectorMetadata) HdfsContext(io.prestosql.plugin.hive.HdfsEnvironment.HdfsContext) MaterializedResult(io.prestosql.testing.MaterializedResult) ConnectorPageSink(io.prestosql.spi.connector.ConnectorPageSink)

Example 5 with Partition

use of io.prestosql.plugin.hive.metastore.Partition in project hetu-core by openlookeng.

the class AbstractTestHive method eraseStatistics.

private void eraseStatistics(SchemaTableName schemaTableName) {
    HiveMetastore hiveMetastoreClient = getMetastoreClient();
    HiveIdentity identity = new HiveIdentity(SESSION);
    hiveMetastoreClient.updateTableStatistics(identity, schemaTableName.getSchemaName(), schemaTableName.getTableName(), statistics -> new PartitionStatistics(createEmptyStatistics(), ImmutableMap.of()));
    Table table = hiveMetastoreClient.getTable(identity, schemaTableName.getSchemaName(), schemaTableName.getTableName()).orElseThrow(() -> new TableNotFoundException(schemaTableName));
    List<String> partitionColumns = table.getPartitionColumns().stream().map(Column::getName).collect(toImmutableList());
    if (!table.getPartitionColumns().isEmpty()) {
        List<String> partitionNames = hiveMetastoreClient.getPartitionNames(identity, schemaTableName.getSchemaName(), schemaTableName.getTableName()).orElse(ImmutableList.of());
        List<Partition> partitions = hiveMetastoreClient.getPartitionsByNames(identity, schemaTableName.getSchemaName(), schemaTableName.getTableName(), partitionNames).entrySet().stream().map(Map.Entry::getValue).filter(Optional::isPresent).map(Optional::get).collect(toImmutableList());
        for (Partition partition : partitions) {
            hiveMetastoreClient.updatePartitionStatistics(identity, schemaTableName.getSchemaName(), schemaTableName.getTableName(), makePartName(partitionColumns, partition.getValues()), statistics -> new PartitionStatistics(createEmptyStatistics(), ImmutableMap.of()));
        }
    }
}
Also used : TableNotFoundException(io.prestosql.spi.connector.TableNotFoundException) Partition(io.prestosql.plugin.hive.metastore.Partition) Table(io.prestosql.plugin.hive.metastore.Table) Optional(java.util.Optional) ThriftHiveMetastore(io.prestosql.plugin.hive.metastore.thrift.ThriftHiveMetastore) CachingHiveMetastore(io.prestosql.plugin.hive.metastore.CachingHiveMetastore) SemiTransactionalHiveMetastore(io.prestosql.plugin.hive.metastore.SemiTransactionalHiveMetastore) HiveMetastore(io.prestosql.plugin.hive.metastore.HiveMetastore) BridgingHiveMetastore(io.prestosql.plugin.hive.metastore.thrift.BridgingHiveMetastore) Map(java.util.Map) Collections.emptyMap(java.util.Collections.emptyMap) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap) HiveIdentity(io.prestosql.plugin.hive.authentication.HiveIdentity)

Aggregations

Partition (io.prestosql.plugin.hive.metastore.Partition)32 PrestoException (io.prestosql.spi.PrestoException)22 Table (io.prestosql.plugin.hive.metastore.Table)21 Optional (java.util.Optional)19 ImmutableMap (com.google.common.collect.ImmutableMap)17 List (java.util.List)17 Map (java.util.Map)17 Path (org.apache.hadoop.fs.Path)17 ImmutableList (com.google.common.collect.ImmutableList)16 HiveIdentity (io.prestosql.plugin.hive.authentication.HiveIdentity)15 Column (io.prestosql.plugin.hive.metastore.Column)15 ConnectorSession (io.prestosql.spi.connector.ConnectorSession)15 SchemaTableName (io.prestosql.spi.connector.SchemaTableName)15 HdfsContext (io.prestosql.plugin.hive.HdfsEnvironment.HdfsContext)14 Objects.requireNonNull (java.util.Objects.requireNonNull)14 Set (java.util.Set)14 ImmutableMap.toImmutableMap (com.google.common.collect.ImmutableMap.toImmutableMap)13 ImmutableSet (com.google.common.collect.ImmutableSet)13 NOT_SUPPORTED (io.prestosql.spi.StandardErrorCode.NOT_SUPPORTED)13 String.format (java.lang.String.format)13