Search in sources :

Example 1 with Storage

use of com.facebook.presto.hive.metastore.Storage in project presto by prestodb.

the class DeltaMetadata method getTableHandle.

@Override
public DeltaTableHandle getTableHandle(ConnectorSession session, SchemaTableName schemaTableName) {
    String schemaName = schemaTableName.getSchemaName();
    String tableName = schemaTableName.getTableName();
    if (!listSchemaNames(session).contains(schemaName)) {
        // indicates table doesn't exist
        return null;
    }
    DeltaTableName deltaTableName = DeltaTableName.from(tableName);
    String tableLocation;
    if (PATH_SCHEMA.equalsIgnoreCase(schemaName)) {
        tableLocation = deltaTableName.getTableNameOrPath();
    } else {
        Optional<Table> metastoreTable = metastore.getTable(metastoreContext(session), schemaName, deltaTableName.getTableNameOrPath());
        if (!metastoreTable.isPresent()) {
            // indicates table doesn't exist
            return null;
        }
        Map<String, String> tableParameters = metastoreTable.get().getParameters();
        Storage storage = metastoreTable.get().getStorage();
        tableLocation = storage.getLocation();
        // location is found in SerDe properties with key "path".
        if ("delta".equalsIgnoreCase(tableParameters.get("spark.sql.sources.provider"))) {
            tableLocation = storage.getSerdeParameters().get("path");
            if (Strings.isNullOrEmpty(tableLocation)) {
                log.warn("Location key ('path') is missing in SerDe properties for table %s. " + "Using the 'location' attribute as the table location.", schemaTableName);
                // fallback to using the location attribute
                tableLocation = storage.getLocation();
            }
        }
    }
    Optional<DeltaTable> table = deltaClient.getTable(session, schemaTableName, tableLocation, deltaTableName.getSnapshotId(), deltaTableName.getTimestampMillisUtc());
    if (table.isPresent()) {
        return new DeltaTableHandle(connectorId, table.get());
    }
    return null;
}
Also used : Table(com.facebook.presto.hive.metastore.Table) Storage(com.facebook.presto.hive.metastore.Storage)

Example 2 with Storage

use of com.facebook.presto.hive.metastore.Storage in project presto by prestodb.

the class ManifestPartitionLoader method createInternalHiveSplitFactory.

private InternalHiveSplitFactory createInternalHiveSplitFactory(Table table, HivePartitionMetadata partition, ConnectorSession session, Optional<Domain> pathDomain, HdfsEnvironment hdfsEnvironment, HdfsContext hdfsContext, boolean schedulerUsesHostAddresses) throws IOException {
    String partitionName = partition.getHivePartition().getPartitionId();
    Storage storage = partition.getPartition().map(Partition::getStorage).orElse(table.getStorage());
    String inputFormatName = storage.getStorageFormat().getInputFormat();
    int partitionDataColumnCount = partition.getPartition().map(p -> p.getColumns().size()).orElse(table.getDataColumns().size());
    List<HivePartitionKey> partitionKeys = getPartitionKeys(table, partition.getPartition(), partitionName);
    Path path = new Path(getPartitionLocation(table, partition.getPartition()));
    Configuration configuration = hdfsEnvironment.getConfiguration(hdfsContext, path);
    InputFormat<?, ?> inputFormat = getInputFormat(configuration, inputFormatName, false);
    ExtendedFileSystem fileSystem = hdfsEnvironment.getFileSystem(hdfsContext, path);
    return new InternalHiveSplitFactory(fileSystem, inputFormat, pathDomain, getNodeSelectionStrategy(session), getMaxInitialSplitSize(session), false, new HiveSplitPartitionInfo(storage, path.toUri(), partitionKeys, partitionName, partitionDataColumnCount, partition.getTableToPartitionMapping(), Optional.empty(), partition.getRedundantColumnDomains()), schedulerUsesHostAddresses, partition.getEncryptionInformation());
}
Also used : Table(com.facebook.presto.hive.metastore.Table) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) BlockLocation(org.apache.hadoop.fs.BlockLocation) MetastoreUtil.getPartitionLocation(com.facebook.presto.hive.metastore.MetastoreUtil.getPartitionLocation) MALFORMED_HIVE_FILE_STATISTICS(com.facebook.presto.hive.HiveErrorCode.MALFORMED_HIVE_FILE_STATISTICS) PrestoException(com.facebook.presto.spi.PrestoException) FileStatus(org.apache.hadoop.fs.FileStatus) HiveSessionProperties.getNodeSelectionStrategy(com.facebook.presto.hive.HiveSessionProperties.getNodeSelectionStrategy) Partition(com.facebook.presto.hive.metastore.Partition) HiveUtil.getInputFormat(com.facebook.presto.hive.HiveUtil.getInputFormat) IGNORED(com.facebook.presto.hive.NestedDirectoryPolicy.IGNORED) ImmutableList(com.google.common.collect.ImmutableList) Verify.verify(com.google.common.base.Verify.verify) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) Objects.requireNonNull(java.util.Objects.requireNonNull) InputFormat(org.apache.hadoop.mapred.InputFormat) Path(org.apache.hadoop.fs.Path) RECURSE(com.facebook.presto.hive.NestedDirectoryPolicy.RECURSE) MANIFEST_VERSION(com.facebook.presto.hive.HiveManifestUtils.MANIFEST_VERSION) HiveManifestUtils.decompressFileSizes(com.facebook.presto.hive.HiveManifestUtils.decompressFileSizes) Storage(com.facebook.presto.hive.metastore.Storage) Iterator(java.util.Iterator) FILE_SIZES(com.facebook.presto.hive.HiveManifestUtils.FILE_SIZES) HiveManifestUtils.decompressFileNames(com.facebook.presto.hive.HiveManifestUtils.decompressFileNames) ExtendedFileSystem(com.facebook.presto.hive.filesystem.ExtendedFileSystem) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) InternalHiveSplitFactory(com.facebook.presto.hive.util.InternalHiveSplitFactory) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus) HiveSessionProperties.getMaxSplitSize(com.facebook.presto.hive.HiveSessionProperties.getMaxSplitSize) IOException(java.io.IOException) FILE_NAMES(com.facebook.presto.hive.HiveManifestUtils.FILE_NAMES) Domain(com.facebook.presto.common.predicate.Domain) String.format(java.lang.String.format) HiveSessionProperties.getMaxInitialSplitSize(com.facebook.presto.hive.HiveSessionProperties.getMaxInitialSplitSize) ConnectorSession(com.facebook.presto.spi.ConnectorSession) UncheckedIOException(java.io.UncheckedIOException) List(java.util.List) VERSION_1(com.facebook.presto.hive.HiveManifestUtils.VERSION_1) Optional(java.util.Optional) HiveSessionProperties.isManifestVerificationEnabled(com.facebook.presto.hive.HiveSessionProperties.isManifestVerificationEnabled) Path(org.apache.hadoop.fs.Path) InternalHiveSplitFactory(com.facebook.presto.hive.util.InternalHiveSplitFactory) Storage(com.facebook.presto.hive.metastore.Storage) Configuration(org.apache.hadoop.conf.Configuration) ExtendedFileSystem(com.facebook.presto.hive.filesystem.ExtendedFileSystem)

Example 3 with Storage

use of com.facebook.presto.hive.metastore.Storage in project presto by prestodb.

the class TestDynamicPruning method createTestingPageSource.

private static ConnectorPageSource createTestingPageSource(HiveTransactionHandle transaction, HiveClientConfig config, SplitContext splitContext, MetastoreClientConfig metastoreClientConfig, File outputFile) {
    ImmutableList<HivePartitionKey> partitionKeys = ImmutableList.of(new HivePartitionKey(PARTITION_COLUMN.getName(), Optional.of("2020-09-09")));
    Map<Integer, Column> partitionSchemaDifference = ImmutableMap.of(1, new Column("ds", HIVE_STRING, Optional.empty(), Optional.empty()));
    HiveSplit split = new HiveSplit(SCHEMA_NAME, TABLE_NAME, "", "file:///" + outputFile.getAbsolutePath(), 0, outputFile.length(), outputFile.length(), Instant.now().toEpochMilli(), new Storage(StorageFormat.create(config.getHiveStorageFormat().getSerDe(), config.getHiveStorageFormat().getInputFormat(), config.getHiveStorageFormat().getOutputFormat()), "location", Optional.of(new HiveBucketProperty(ImmutableList.of("l_orderkey"), 10, ImmutableList.of(), HIVE_COMPATIBLE, Optional.empty())), false, ImmutableMap.of(), ImmutableMap.of()), partitionKeys, ImmutableList.of(), OptionalInt.of(1), OptionalInt.of(1), NO_PREFERENCE, getColumnHandles().size(), TableToPartitionMapping.mapColumnsByIndex(partitionSchemaDifference), Optional.empty(), false, Optional.empty(), NO_CACHE_REQUIREMENT, Optional.empty(), ImmutableMap.of(), ImmutableSet.of(), SplitWeight.standard());
    TableHandle tableHandle = new TableHandle(new ConnectorId(HIVE_CATALOG), new HiveTableHandle(SCHEMA_NAME, TABLE_NAME), transaction, Optional.of(new HiveTableLayoutHandle(new SchemaTableName(SCHEMA_NAME, TABLE_NAME), "path", ImmutableList.of(PARTITION_HIVE_COLUMN_HANDLE), getColumnHandles().stream().map(column -> new Column(column.getName(), column.getHiveType(), Optional.empty(), Optional.empty())).collect(toImmutableList()), ImmutableMap.of(), TupleDomain.all(), TRUE_CONSTANT, ImmutableMap.of(), TupleDomain.all(), Optional.empty(), Optional.empty(), false, "layout", Optional.empty(), false)));
    HivePageSourceProvider provider = new HivePageSourceProvider(config, createTestHdfsEnvironment(config, metastoreClientConfig), getDefaultHiveRecordCursorProvider(config, metastoreClientConfig), getDefaultHiveBatchPageSourceFactories(config, metastoreClientConfig), getDefaultHiveSelectivePageSourceFactories(config, metastoreClientConfig), FUNCTION_AND_TYPE_MANAGER, ROW_EXPRESSION_SERVICE);
    return provider.createPageSource(transaction, getSession(config), split, tableHandle.getLayout().get(), ImmutableList.copyOf(getColumnHandles()), splitContext);
}
Also used : SchemaTableName(com.facebook.presto.spi.SchemaTableName) Storage(com.facebook.presto.hive.metastore.Storage) Column(com.facebook.presto.hive.metastore.Column) TableHandle(com.facebook.presto.spi.TableHandle) ConnectorId(com.facebook.presto.spi.ConnectorId)

Example 4 with Storage

use of com.facebook.presto.hive.metastore.Storage in project presto by prestodb.

the class TestHiveMetadata method testColumnMetadataGetter.

@Test
public void testColumnMetadataGetter() {
    TypeManager mockTypeManager = new TestingTypeManager();
    Column column1 = new Column("c1", HIVE_INT, Optional.empty(), Optional.of("some-metadata"));
    HiveColumnHandle hiveColumnHandle1 = new HiveColumnHandle(column1.getName(), HiveType.HIVE_INT, TypeSignature.parseTypeSignature("int"), 0, HiveColumnHandle.ColumnType.REGULAR, Optional.empty(), Optional.empty());
    HiveColumnHandle hidden = new HiveColumnHandle(HiveColumnHandle.PATH_COLUMN_NAME, HiveType.HIVE_INT, TypeSignature.parseTypeSignature("int"), 0, HiveColumnHandle.ColumnType.SYNTHESIZED, Optional.empty(), Optional.empty());
    Column partitionColumn = new Column("ds", HIVE_STRING, Optional.empty(), Optional.empty());
    Table mockTable = new Table("schema", "table", "user", PrestoTableType.MANAGED_TABLE, new Storage(fromHiveStorageFormat(ORC), "location", Optional.of(new HiveBucketProperty(ImmutableList.of(column1.getName()), 100, ImmutableList.of(), HIVE_COMPATIBLE, Optional.empty())), false, ImmutableMap.of(), ImmutableMap.of()), ImmutableList.of(column1), ImmutableList.of(partitionColumn), ImmutableMap.of(), Optional.empty(), Optional.empty());
    ColumnMetadata actual = HiveMetadata.columnMetadataGetter(mockTable, mockTypeManager, new HiveColumnConverter()).apply(hiveColumnHandle1);
    ColumnMetadata expected = new ColumnMetadata("c1", IntegerType.INTEGER);
    assertEquals(actual, expected);
    actual = HiveMetadata.columnMetadataGetter(mockTable, mockTypeManager, new TestColumnConverter()).apply(hidden);
    expected = ColumnMetadata.builder().setName(HiveColumnHandle.PATH_COLUMN_NAME).setType(IntegerType.INTEGER).setHidden(true).build();
    assertEquals(actual, expected);
}
Also used : ColumnMetadata(com.facebook.presto.spi.ColumnMetadata) Table(com.facebook.presto.hive.metastore.Table) Storage(com.facebook.presto.hive.metastore.Storage) HiveMetadata.decodePreferredOrderingColumnsFromStorage(com.facebook.presto.hive.HiveMetadata.decodePreferredOrderingColumnsFromStorage) Column(com.facebook.presto.hive.metastore.Column) SortingColumn(com.facebook.presto.hive.metastore.SortingColumn) TestingTypeManager(com.facebook.presto.common.type.TestingTypeManager) TypeManager(com.facebook.presto.common.type.TypeManager) HiveColumnConverter(com.facebook.presto.hive.metastore.HiveColumnConverter) TestingTypeManager(com.facebook.presto.common.type.TestingTypeManager) Test(org.testng.annotations.Test)

Example 5 with Storage

use of com.facebook.presto.hive.metastore.Storage in project presto by prestodb.

the class TestHiveUtil method testShouldUseRecordReaderFromInputFormat.

@Test
public void testShouldUseRecordReaderFromInputFormat() {
    StorageFormat hudiStorageFormat = StorageFormat.create("parquet.hive.serde.ParquetHiveSerDe", "org.apache.hudi.hadoop.HoodieParquetInputFormat", "");
    assertFalse(shouldUseRecordReaderFromInputFormat(new Configuration(), new Storage(hudiStorageFormat, "test", Optional.empty(), true, ImmutableMap.of(), ImmutableMap.of()), ImmutableMap.of()));
    StorageFormat hudiRealtimeStorageFormat = StorageFormat.create("parquet.hive.serde.ParquetHiveSerDe", "org.apache.hudi.hadoop.realtime.HoodieParquetRealtimeInputFormat", "");
    Map<String, String> customSplitInfo = ImmutableMap.of(CUSTOM_FILE_SPLIT_CLASS_KEY, HoodieRealtimeFileSplit.class.getName(), HUDI_BASEPATH_KEY, "/test/file.parquet", HUDI_DELTA_FILEPATHS_KEY, "/test/.file_100.log", HUDI_MAX_COMMIT_TIME_KEY, "100");
    assertTrue(shouldUseRecordReaderFromInputFormat(new Configuration(), new Storage(hudiRealtimeStorageFormat, "test", Optional.empty(), true, ImmutableMap.of(), ImmutableMap.of()), customSplitInfo));
}
Also used : HoodieRealtimeFileSplit(org.apache.hudi.hadoop.realtime.HoodieRealtimeFileSplit) Storage(com.facebook.presto.hive.metastore.Storage) Configuration(org.apache.hadoop.conf.Configuration) StorageFormat(com.facebook.presto.hive.metastore.StorageFormat) IntString(org.apache.hadoop.hive.serde2.thrift.test.IntString) Test(org.testng.annotations.Test)

Aggregations

Storage (com.facebook.presto.hive.metastore.Storage)19 SchemaTableName (com.facebook.presto.spi.SchemaTableName)12 Column (com.facebook.presto.hive.metastore.Column)9 Configuration (org.apache.hadoop.conf.Configuration)9 Test (org.testng.annotations.Test)9 ConnectorSession (com.facebook.presto.spi.ConnectorSession)8 PrestoException (com.facebook.presto.spi.PrestoException)8 List (java.util.List)8 Objects.requireNonNull (java.util.Objects.requireNonNull)8 Optional (java.util.Optional)8 Path (org.apache.hadoop.fs.Path)8 StorageFormat (com.facebook.presto.hive.metastore.StorageFormat)7 ImmutableList (com.google.common.collect.ImmutableList)7 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)7 IOException (java.io.IOException)7 TupleDomain (com.facebook.presto.common.predicate.TupleDomain)6 ConnectorPageSource (com.facebook.presto.spi.ConnectorPageSource)6 DateTimeZone (org.joda.time.DateTimeZone)6 CacheConfig (com.facebook.presto.cache.CacheConfig)5 Domain (com.facebook.presto.common.predicate.Domain)5