Search in sources :

Example 1 with HiveTableProperties.getHiveStorageFormat

use of io.prestosql.plugin.hive.HiveTableProperties.getHiveStorageFormat in project boostkit-bigdata by kunpengcompute.

the class HiveMetadata method getEmptyTableProperties.

protected Map<String, String> getEmptyTableProperties(ConnectorTableMetadata tableMetadata, Optional<HiveBucketProperty> bucketProperty, HdfsContext hdfsContext) {
    HiveStorageFormat hiveStorageFormat = HiveTableProperties.getHiveStorageFormat(tableMetadata.getProperties());
    ImmutableMap.Builder<String, String> tableProperties = ImmutableMap.builder();
    bucketProperty.ifPresent(hiveBucketProperty -> tableProperties.put(BUCKETING_VERSION, Integer.toString(hiveBucketProperty.getBucketingVersion().getVersion())));
    // ORC format specific properties
    if (getTransactionalValue(tableMetadata.getProperties())) {
        if (!hiveStorageFormat.equals(HiveStorageFormat.ORC)) {
            // only ORC storage format support ACID
            throw new PrestoException(NOT_SUPPORTED, "Only ORC storage format supports creating transactional table.");
        }
        // set transactional property.
        tableProperties.put(TRANSACTIONAL, Boolean.toString(getTransactionalValue(tableMetadata.getProperties())));
    }
    List<String> columns = HiveTableProperties.getOrcBloomFilterColumns(tableMetadata.getProperties());
    if (columns != null && !columns.isEmpty()) {
        checkFormatForProperty(hiveStorageFormat, HiveStorageFormat.ORC, HiveTableProperties.ORC_BLOOM_FILTER_COLUMNS);
        tableProperties.put(ORC_BLOOM_FILTER_COLUMNS_KEY, Joiner.on(",").join(columns));
        tableProperties.put(ORC_BLOOM_FILTER_FPP_KEY, String.valueOf(HiveTableProperties.getOrcBloomFilterFpp(tableMetadata.getProperties())));
    }
    // Avro specific properties
    String avroSchemaUrl = HiveTableProperties.getAvroSchemaUrl(tableMetadata.getProperties());
    if (avroSchemaUrl != null) {
        checkFormatForProperty(hiveStorageFormat, HiveStorageFormat.AVRO, HiveTableProperties.AVRO_SCHEMA_URL);
        tableProperties.put(AVRO_SCHEMA_URL_KEY, validateAndNormalizeAvroSchemaUrl(avroSchemaUrl, hdfsContext));
    }
    // Textfile specific properties
    HiveTableProperties.getTextHeaderSkipCount(tableMetadata.getProperties()).ifPresent(headerSkipCount -> {
        if (headerSkipCount > 0) {
            checkFormatForProperty(hiveStorageFormat, HiveStorageFormat.TEXTFILE, HiveTableProperties.TEXTFILE_SKIP_HEADER_LINE_COUNT);
            tableProperties.put(TEXT_SKIP_HEADER_COUNT_KEY, String.valueOf(headerSkipCount));
        }
        if (headerSkipCount < 0) {
            throw new PrestoException(HiveErrorCode.HIVE_INVALID_METADATA, String.format("Invalid value for %s property: %s", HiveTableProperties.TEXTFILE_SKIP_HEADER_LINE_COUNT, headerSkipCount));
        }
    });
    HiveTableProperties.getTextFooterSkipCount(tableMetadata.getProperties()).ifPresent(footerSkipCount -> {
        if (footerSkipCount > 0) {
            checkFormatForProperty(hiveStorageFormat, HiveStorageFormat.TEXTFILE, HiveTableProperties.TEXTFILE_SKIP_FOOTER_LINE_COUNT);
            tableProperties.put(TEXT_SKIP_FOOTER_COUNT_KEY, String.valueOf(footerSkipCount));
        }
        if (footerSkipCount < 0) {
            throw new PrestoException(HiveErrorCode.HIVE_INVALID_METADATA, String.format("Invalid value for %s property: %s", HiveTableProperties.TEXTFILE_SKIP_FOOTER_LINE_COUNT, footerSkipCount));
        }
    });
    // CSV specific properties
    HiveTableProperties.getCsvProperty(tableMetadata.getProperties(), HiveTableProperties.CSV_ESCAPE).ifPresent(escape -> {
        checkFormatForProperty(hiveStorageFormat, HiveStorageFormat.CSV, HiveTableProperties.CSV_ESCAPE);
        tableProperties.put(CSV_ESCAPE_KEY, escape.toString());
    });
    HiveTableProperties.getCsvProperty(tableMetadata.getProperties(), HiveTableProperties.CSV_QUOTE).ifPresent(quote -> {
        checkFormatForProperty(hiveStorageFormat, HiveStorageFormat.CSV, HiveTableProperties.CSV_QUOTE);
        tableProperties.put(CSV_QUOTE_KEY, quote.toString());
    });
    HiveTableProperties.getCsvProperty(tableMetadata.getProperties(), HiveTableProperties.CSV_SEPARATOR).ifPresent(separator -> {
        checkFormatForProperty(hiveStorageFormat, HiveStorageFormat.CSV, HiveTableProperties.CSV_SEPARATOR);
        tableProperties.put(CSV_SEPARATOR_KEY, separator.toString());
    });
    // Table comment property
    tableMetadata.getComment().ifPresent(value -> tableProperties.put(TABLE_COMMENT, value));
    // field delimiter
    Object fieldDelimit = tableMetadata.getProperties().get(FIELD_DELIM);
    if (hiveStorageFormat.equals(HiveStorageFormat.MULTIDELIMIT) && fieldDelimit == null) {
        throw new PrestoException(INVALID_TABLE_PROPERTY, "This table does not have serde property \"field.delim\"!");
    }
    if (fieldDelimit != null) {
        checkFormatForProperty(hiveStorageFormat, HiveStorageFormat.MULTIDELIMIT, FIELD_DELIM);
        tableProperties.put(FIELD_DELIM, fieldDelimit.toString());
    }
    return tableProperties.build();
}
Also used : HiveTableProperties.getHiveStorageFormat(io.prestosql.plugin.hive.HiveTableProperties.getHiveStorageFormat) PrestoException(io.prestosql.spi.PrestoException) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) ImmutableMap(com.google.common.collect.ImmutableMap)

Example 2 with HiveTableProperties.getHiveStorageFormat

use of io.prestosql.plugin.hive.HiveTableProperties.getHiveStorageFormat in project hetu-core by openlookeng.

the class HiveMetadata method createTable.

@Override
public void createTable(ConnectorSession session, ConnectorTableMetadata tableMetadata, boolean ignoreExisting) {
    SchemaTableName schemaTableName = tableMetadata.getTable();
    String schemaName = schemaTableName.getSchemaName();
    String tableName = schemaTableName.getTableName();
    List<String> partitionedBy = getPartitionedBy(tableMetadata.getProperties());
    Optional<HiveBucketProperty> bucketProperty = HiveTableProperties.getBucketProperty(tableMetadata.getProperties());
    if ((bucketProperty.isPresent() || !partitionedBy.isEmpty()) && HiveTableProperties.getAvroSchemaUrl(tableMetadata.getProperties()) != null) {
        throw new PrestoException(NOT_SUPPORTED, "Bucketing/Partitioning columns not supported when Avro schema url is set");
    }
    List<HiveColumnHandle> columnHandles = getColumnHandles(tableMetadata, ImmutableSet.copyOf(partitionedBy), typeTranslator);
    HiveStorageFormat hiveStorageFormat = HiveTableProperties.getHiveStorageFormat(tableMetadata.getProperties());
    Map<String, String> tableProperties = getEmptyTableProperties(tableMetadata, bucketProperty, new HdfsContext(session, schemaName, tableName));
    hiveStorageFormat.validateColumns(columnHandles);
    Map<String, HiveColumnHandle> columnHandlesByName = Maps.uniqueIndex(columnHandles, HiveColumnHandle::getName);
    List<Column> partitionColumns = partitionedBy.stream().map(columnHandlesByName::get).map(column -> new Column(column.getName(), column.getHiveType(), column.getComment())).collect(toList());
    checkPartitionTypesSupported(partitionColumns);
    boolean external = isExternalTable(tableMetadata.getProperties());
    String externalLocation = getExternalLocation(tableMetadata.getProperties());
    if ((external || (externalLocation != null)) && !createsOfNonManagedTablesEnabled) {
        throw new PrestoException(NOT_SUPPORTED, "Cannot create non-managed Hive table");
    }
    Path targetPath;
    Optional<String> location = getLocation(tableMetadata.getProperties());
    // User specifies the location property
    if (location.isPresent()) {
        if (!tableCreatesWithLocationAllowed) {
            throw new PrestoException(NOT_SUPPORTED, format("Setting %s property is not allowed", LOCATION_PROPERTY));
        }
        targetPath = getPath(new HdfsContext(session, schemaName, tableName), location.get(), external);
    } else {
        // User specifies external property, but location property is absent
        if (external) {
            throw new PrestoException(NOT_SUPPORTED, format("Cannot create external Hive table without location. Set it through '%s' property", LOCATION_PROPERTY));
        }
        // User specifies the external location property
        if (externalLocation != null) {
            external = true;
            targetPath = getPath(new HdfsContext(session, schemaName, tableName), externalLocation, true);
        } else // Default option
        {
            external = false;
            LocationHandle locationHandle = locationService.forNewTable(metastore, session, schemaName, tableName, Optional.empty(), Optional.empty(), HiveWriteUtils.OpertionType.CREATE_TABLE);
            targetPath = locationService.getQueryWriteInfo(locationHandle).getTargetPath();
        }
    }
    Table table = buildTableObject(session.getQueryId(), schemaName, tableName, session.getUser(), columnHandles, hiveStorageFormat, partitionedBy, bucketProperty, tableProperties, targetPath, external, prestoVersion);
    PrincipalPrivileges principalPrivileges = MetastoreUtil.buildInitialPrivilegeSet(table.getOwner());
    HiveBasicStatistics basicStatistics = table.getPartitionColumns().isEmpty() ? HiveBasicStatistics.createZeroStatistics() : HiveBasicStatistics.createEmptyStatistics();
    metastore.createTable(session, table, principalPrivileges, Optional.empty(), ignoreExisting, new PartitionStatistics(basicStatistics, ImmutableMap.of()));
}
Also used : TableStatistics(io.prestosql.spi.statistics.TableStatistics) StorageFormat(io.prestosql.plugin.hive.metastore.StorageFormat) PartialAndFinalAggregationType(io.prestosql.spi.PartialAndFinalAggregationType) FIELD_DELIM(org.apache.hadoop.hive.serde.serdeConstants.FIELD_DELIM) HiveUtil.verifyPartitionTypeSupported(io.prestosql.plugin.hive.HiveUtil.verifyPartitionTypeSupported) FileSystem(org.apache.hadoop.fs.FileSystem) HIVE_FILESYSTEM_ERROR(io.prestosql.plugin.hive.HiveErrorCode.HIVE_FILESYSTEM_ERROR) HiveUtil.hiveColumnHandles(io.prestosql.plugin.hive.HiveUtil.hiveColumnHandles) MetastoreUtil(io.prestosql.plugin.hive.metastore.MetastoreUtil) TableAlreadyExistsException(io.prestosql.spi.connector.TableAlreadyExistsException) NullableValue(io.prestosql.spi.predicate.NullableValue) RoleGrant(io.prestosql.spi.security.RoleGrant) ConnectorVacuumTableHandle(io.prestosql.spi.connector.ConnectorVacuumTableHandle) FileStatus(org.apache.hadoop.fs.FileStatus) SCHEMA_NOT_EMPTY(io.prestosql.spi.StandardErrorCode.SCHEMA_NOT_EMPTY) HiveUtil.getPartitionKeyColumnHandles(io.prestosql.plugin.hive.HiveUtil.getPartitionKeyColumnHandles) ConnectorDeleteAsInsertTableHandle(io.prestosql.spi.connector.ConnectorDeleteAsInsertTableHandle) Future(java.util.concurrent.Future) TableNotFoundException(io.prestosql.spi.connector.TableNotFoundException) BucketingVersion(io.prestosql.plugin.hive.HiveBucketing.BucketingVersion) ConnectorUpdateTableHandle(io.prestosql.spi.connector.ConnectorUpdateTableHandle) Map(java.util.Map) HiveTableProperties.getPartitionedBy(io.prestosql.plugin.hive.HiveTableProperties.getPartitionedBy) ENGLISH(java.util.Locale.ENGLISH) ConstraintApplicationResult(io.prestosql.spi.connector.ConstraintApplicationResult) SystemTable(io.prestosql.spi.connector.SystemTable) GrantInfo(io.prestosql.spi.security.GrantInfo) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) org.apache.hadoop.hive.serde.serdeConstants(org.apache.hadoop.hive.serde.serdeConstants) TableStatisticsMetadata(io.prestosql.spi.statistics.TableStatisticsMetadata) Set(java.util.Set) LOCATION_PROPERTY(io.prestosql.plugin.hive.HiveTableProperties.LOCATION_PROPERTY) HiveTableProperties.getTransactionalValue(io.prestosql.plugin.hive.HiveTableProperties.getTransactionalValue) MANAGED_TABLE(org.apache.hadoop.hive.metastore.TableType.MANAGED_TABLE) Collectors.joining(java.util.stream.Collectors.joining) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) Stream(java.util.stream.Stream) Table(io.prestosql.plugin.hive.metastore.Table) Privilege(io.prestosql.spi.security.Privilege) INVALID_TABLE_PROPERTY(io.prestosql.spi.StandardErrorCode.INVALID_TABLE_PROPERTY) HiveTableProperties.isExternalTable(io.prestosql.plugin.hive.HiveTableProperties.isExternalTable) Domain(io.prestosql.spi.predicate.Domain) GENERIC_INTERNAL_ERROR(io.prestosql.spi.StandardErrorCode.GENERIC_INTERNAL_ERROR) AccessControlMetadata(io.prestosql.plugin.hive.security.AccessControlMetadata) ColumnStatisticMetadata(io.prestosql.spi.statistics.ColumnStatisticMetadata) SortingColumn(io.prestosql.plugin.hive.metastore.SortingColumn) ConnectorVacuumTableInfo(io.prestosql.spi.connector.ConnectorVacuumTableInfo) ORC(io.prestosql.plugin.hive.HiveStorageFormat.ORC) SchemaTablePrefix(io.prestosql.spi.connector.SchemaTablePrefix) Joiner(com.google.common.base.Joiner) Iterables(com.google.common.collect.Iterables) Database(io.prestosql.plugin.hive.metastore.Database) Slice(io.airlift.slice.Slice) Partition(io.prestosql.plugin.hive.metastore.Partition) TRANSACTIONAL(io.prestosql.plugin.hive.HiveTableProperties.TRANSACTIONAL) HiveWriterFactory.getSnapshotSubFileIndex(io.prestosql.plugin.hive.HiveWriterFactory.getSnapshotSubFileIndex) Supplier(java.util.function.Supplier) ComputedStatistics(io.prestosql.spi.statistics.ComputedStatistics) ArrayList(java.util.ArrayList) HiveUtil.decodeViewData(io.prestosql.plugin.hive.HiveUtil.decodeViewData) OptionalLong(java.util.OptionalLong) TupleDomain.withColumnDomains(io.prestosql.spi.predicate.TupleDomain.withColumnDomains) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) ConnectorPartitioningHandle(io.prestosql.spi.connector.ConnectorPartitioningHandle) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) RecordCursor(io.prestosql.spi.connector.RecordCursor) DiscretePredicates(io.prestosql.spi.connector.DiscretePredicates) SemiTransactionalHiveMetastore(io.prestosql.plugin.hive.metastore.SemiTransactionalHiveMetastore) HiveWriterFactory.removeSnapshotFileName(io.prestosql.plugin.hive.HiveWriterFactory.removeSnapshotFileName) ImmutableSortedMap(com.google.common.collect.ImmutableSortedMap) ConnectorOutputTableHandle(io.prestosql.spi.connector.ConnectorOutputTableHandle) Properties(java.util.Properties) HiveUtil.isPrestoView(io.prestosql.plugin.hive.HiveUtil.isPrestoView) TypeManager(io.prestosql.spi.type.TypeManager) IOException(java.io.IOException) USER(io.prestosql.spi.security.PrincipalType.USER) PrincipalPrivileges(io.prestosql.plugin.hive.metastore.PrincipalPrivileges) ConnectorTableMetadata(io.prestosql.spi.connector.ConnectorTableMetadata) File(java.io.File) ExecutionException(java.util.concurrent.ExecutionException) Streams.stream(com.google.common.collect.Streams.stream) IS_EXTERNAL_TABLE(io.prestosql.plugin.hive.HiveTableProperties.IS_EXTERNAL_TABLE) HiveColumnStatistics(io.prestosql.plugin.hive.metastore.HiveColumnStatistics) ColumnHandle(io.prestosql.spi.connector.ColumnHandle) ConnectorTablePartitioning(io.prestosql.spi.connector.ConnectorTablePartitioning) TableType(org.apache.hadoop.hive.metastore.TableType) ConfigurationUtils(io.prestosql.plugin.hive.util.ConfigurationUtils) PrestoPrincipal(io.prestosql.spi.security.PrestoPrincipal) HiveWriterFactory.isSnapshotFile(io.prestosql.plugin.hive.HiveWriterFactory.isSnapshotFile) VarcharType(io.prestosql.spi.type.VarcharType) HiveTableProperties.getLocation(io.prestosql.plugin.hive.HiveTableProperties.getLocation) ConnectorMetadata(io.prestosql.spi.connector.ConnectorMetadata) Statistics(io.prestosql.plugin.hive.util.Statistics) URL(java.net.URL) HdfsContext(io.prestosql.plugin.hive.HdfsEnvironment.HdfsContext) EXTERNAL_TABLE(org.apache.hadoop.hive.metastore.TableType.EXTERNAL_TABLE) ViewNotFoundException(io.prestosql.spi.connector.ViewNotFoundException) HiveTableProperties.getHiveStorageFormat(io.prestosql.plugin.hive.HiveTableProperties.getHiveStorageFormat) Duration(io.airlift.units.Duration) TableStatisticType(io.prestosql.spi.statistics.TableStatisticType) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) INVALID_ANALYZE_PROPERTY(io.prestosql.spi.StandardErrorCode.INVALID_ANALYZE_PROPERTY) ConnectorSession(io.prestosql.spi.connector.ConnectorSession) Collectors.toMap(java.util.stream.Collectors.toMap) ConnectorTableProperties(io.prestosql.spi.connector.ConnectorTableProperties) Iterables.concat(com.google.common.collect.Iterables.concat) Path(org.apache.hadoop.fs.Path) Type(io.prestosql.spi.type.Type) Splitter(com.google.common.base.Splitter) BIGINT(io.prestosql.spi.type.BigintType.BIGINT) Collectors.toSet(java.util.stream.Collectors.toSet) Constraint(io.prestosql.spi.connector.Constraint) PrestoException(io.prestosql.spi.PrestoException) ImmutableSet(com.google.common.collect.ImmutableSet) HiveWriteUtils.isS3FileSystem(io.prestosql.plugin.hive.HiveWriteUtils.isS3FileSystem) ImmutableMap(com.google.common.collect.ImmutableMap) Predicate(java.util.function.Predicate) HiveUtil.columnExtraInfo(io.prestosql.plugin.hive.HiveUtil.columnExtraInfo) Collections.emptyList(java.util.Collections.emptyList) Collection(java.util.Collection) HiveUtil.encodeViewData(io.prestosql.plugin.hive.HiveUtil.encodeViewData) HiveWriterFactory.isSnapshotSubFile(io.prestosql.plugin.hive.HiveWriterFactory.isSnapshotSubFile) ROW_COUNT(io.prestosql.spi.statistics.TableStatisticType.ROW_COUNT) Collectors(java.util.stream.Collectors) Sets(com.google.common.collect.Sets) String.format(java.lang.String.format) List(java.util.List) PRESTO_VIEW_FLAG(io.prestosql.plugin.hive.HiveUtil.PRESTO_VIEW_FLAG) VarcharType.createUnboundedVarcharType(io.prestosql.spi.type.VarcharType.createUnboundedVarcharType) Function.identity(java.util.function.Function.identity) ConnectorTransactionHandle(io.prestosql.spi.connector.ConnectorTransactionHandle) Optional(java.util.Optional) MoreObjects.firstNonNull(com.google.common.base.MoreObjects.firstNonNull) NOT_SUPPORTED(io.prestosql.spi.StandardErrorCode.NOT_SUPPORTED) AcidUtils(org.apache.hadoop.hive.ql.io.AcidUtils) HiveStatisticsProvider(io.prestosql.plugin.hive.statistics.HiveStatisticsProvider) JsonCodec(io.airlift.json.JsonCodec) IntStream(java.util.stream.IntStream) ConnectorOutputMetadata(io.prestosql.spi.connector.ConnectorOutputMetadata) Logger(io.airlift.log.Logger) ConnectorViewDefinition(io.prestosql.spi.connector.ConnectorViewDefinition) ConnectorNewTableLayout(io.prestosql.spi.connector.ConnectorNewTableLayout) HashMap(java.util.HashMap) HiveUtil.toPartitionValues(io.prestosql.plugin.hive.HiveUtil.toPartitionValues) HivePrincipal(io.prestosql.plugin.hive.metastore.HivePrincipal) OptionalInt(java.util.OptionalInt) Function(java.util.function.Function) ColumnStatisticType(io.prestosql.spi.statistics.ColumnStatisticType) InMemoryRecordSet(io.prestosql.spi.connector.InMemoryRecordSet) HashSet(java.util.HashSet) HiveTableProperties.getExternalLocation(io.prestosql.plugin.hive.HiveTableProperties.getExternalLocation) SchemaTableName(io.prestosql.spi.connector.SchemaTableName) OpenCSVSerde(org.apache.hadoop.hive.serde2.OpenCSVSerde) INVALID_SCHEMA_PROPERTY(io.prestosql.spi.StandardErrorCode.INVALID_SCHEMA_PROPERTY) ImmutableList(com.google.common.collect.ImmutableList) Verify.verify(com.google.common.base.Verify.verify) Objects.requireNonNull(java.util.Objects.requireNonNull) Suppliers(com.google.common.base.Suppliers) NoSuchElementException(java.util.NoSuchElementException) Block(io.prestosql.spi.block.Block) VerifyException(com.google.common.base.VerifyException) Collections.emptyMap(java.util.Collections.emptyMap) HiveIdentity(io.prestosql.plugin.hive.authentication.HiveIdentity) MalformedURLException(java.net.MalformedURLException) ColumnMetadata(io.prestosql.spi.connector.ColumnMetadata) ConnectorTableHandle(io.prestosql.spi.connector.ConnectorTableHandle) TupleDomain(io.prestosql.spi.predicate.TupleDomain) NON_INHERITABLE_PROPERTIES(io.prestosql.plugin.hive.HiveTableProperties.NON_INHERITABLE_PROPERTIES) Maps(com.google.common.collect.Maps) PRIMITIVE(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category.PRIMITIVE) JobConf(org.apache.hadoop.mapred.JobConf) Collectors.toList(java.util.stream.Collectors.toList) Column(io.prestosql.plugin.hive.metastore.Column) GENERIC_USER_ERROR(io.prestosql.spi.StandardErrorCode.GENERIC_USER_ERROR) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Comparator(java.util.Comparator) ConnectorInsertTableHandle(io.prestosql.spi.connector.ConnectorInsertTableHandle) HiveBucketing.bucketedOnTimestamp(io.prestosql.plugin.hive.HiveBucketing.bucketedOnTimestamp) Path(org.apache.hadoop.fs.Path) SystemTable(io.prestosql.spi.connector.SystemTable) Table(io.prestosql.plugin.hive.metastore.Table) HiveTableProperties.isExternalTable(io.prestosql.plugin.hive.HiveTableProperties.isExternalTable) PrincipalPrivileges(io.prestosql.plugin.hive.metastore.PrincipalPrivileges) PrestoException(io.prestosql.spi.PrestoException) SchemaTableName(io.prestosql.spi.connector.SchemaTableName) HiveTableProperties.getHiveStorageFormat(io.prestosql.plugin.hive.HiveTableProperties.getHiveStorageFormat) SortingColumn(io.prestosql.plugin.hive.metastore.SortingColumn) Column(io.prestosql.plugin.hive.metastore.Column) HdfsContext(io.prestosql.plugin.hive.HdfsEnvironment.HdfsContext)

Example 3 with HiveTableProperties.getHiveStorageFormat

use of io.prestosql.plugin.hive.HiveTableProperties.getHiveStorageFormat in project hetu-core by openlookeng.

the class HiveMetadata method beginCreateTable.

@Override
public HiveOutputTableHandle beginCreateTable(ConnectorSession session, ConnectorTableMetadata tableMetadata, Optional<ConnectorNewTableLayout> layout) {
    if (getExternalLocation(tableMetadata.getProperties()) != null || isExternalTable(tableMetadata.getProperties())) {
        throw new PrestoException(NOT_SUPPORTED, "External tables cannot be created using CREATE TABLE AS");
    }
    if (HiveTableProperties.getAvroSchemaUrl(tableMetadata.getProperties()) != null) {
        throw new PrestoException(NOT_SUPPORTED, "CREATE TABLE AS not supported when Avro schema url is set");
    }
    HiveStorageFormat tableStorageFormat = HiveTableProperties.getHiveStorageFormat(tableMetadata.getProperties());
    List<String> partitionedBy = getPartitionedBy(tableMetadata.getProperties());
    Optional<HiveBucketProperty> bucketProperty = HiveTableProperties.getBucketProperty(tableMetadata.getProperties());
    // get the root directory for the database
    SchemaTableName schemaTableName = tableMetadata.getTable();
    String schemaName = schemaTableName.getSchemaName();
    String tableName = schemaTableName.getTableName();
    Map<String, String> tableProperties = getEmptyTableProperties(tableMetadata, bucketProperty, new HdfsContext(session, schemaName, tableName));
    List<HiveColumnHandle> columnHandles = getColumnHandles(tableMetadata, ImmutableSet.copyOf(partitionedBy), typeTranslator);
    HiveStorageFormat partitionStorageFormat = HiveSessionProperties.isRespectTableFormat(session) ? tableStorageFormat : HiveSessionProperties.getHiveStorageFormat(session);
    // unpartitioned tables ignore the partition storage format
    HiveStorageFormat actualStorageFormat = partitionedBy.isEmpty() ? tableStorageFormat : partitionStorageFormat;
    actualStorageFormat.validateColumns(columnHandles);
    Map<String, HiveColumnHandle> columnHandlesByName = Maps.uniqueIndex(columnHandles, HiveColumnHandle::getName);
    List<Column> partitionColumns = partitionedBy.stream().map(columnHandlesByName::get).map(column -> new Column(column.getName(), column.getHiveType(), column.getComment())).collect(toList());
    checkPartitionTypesSupported(partitionColumns);
    Optional<String> location = getLocation(tableMetadata.getProperties());
    if (location.isPresent() && !tableCreatesWithLocationAllowed) {
        throw new PrestoException(NOT_SUPPORTED, format("Setting %s property is not allowed", LOCATION_PROPERTY));
    }
    Optional<WriteIdInfo> writeIdInfo = Optional.empty();
    if (AcidUtils.isTransactionalTable(tableProperties)) {
        // Create the HiveTableHandle for just to obtain writeIds.
        List<HiveColumnHandle> partitionColumnHandles = partitionedBy.stream().map(columnHandlesByName::get).collect(toList());
        HiveTableHandle tableHandle = new HiveTableHandle(schemaName, tableName, tableProperties, partitionColumnHandles, Optional.empty());
        Optional<Long> writeId = metastore.getTableWriteId(session, tableHandle, HiveACIDWriteType.INSERT);
        if (!writeId.isPresent()) {
            throw new IllegalStateException("No validWriteIds present");
        }
        writeIdInfo = Optional.of(new WriteIdInfo(writeId.get(), writeId.get(), 0));
    }
    LocationHandle locationHandle;
    if (location.isPresent()) {
        Path path = getPath(new HdfsContext(session, schemaName, tableName), location.get(), false);
        locationHandle = locationService.forNewTable(metastore, session, schemaName, tableName, writeIdInfo, Optional.of(path), HiveWriteUtils.OpertionType.CREATE_TABLE_AS);
    } else {
        locationHandle = locationService.forNewTable(metastore, session, schemaName, tableName, writeIdInfo, Optional.empty(), HiveWriteUtils.OpertionType.CREATE_TABLE_AS);
    }
    HiveOutputTableHandle result = new HiveOutputTableHandle(schemaName, tableName, columnHandles, metastore.generatePageSinkMetadata(new HiveIdentity(session), schemaTableName), locationHandle, tableStorageFormat, partitionStorageFormat, partitionedBy, bucketProperty, session.getUser(), tableProperties);
    LocationService.WriteInfo writeInfo = locationService.getQueryWriteInfo(locationHandle);
    metastore.declareIntentionToWrite(session, writeInfo.getWriteMode(), writeInfo.getWritePath(), schemaTableName);
    return result;
}
Also used : TableStatistics(io.prestosql.spi.statistics.TableStatistics) StorageFormat(io.prestosql.plugin.hive.metastore.StorageFormat) PartialAndFinalAggregationType(io.prestosql.spi.PartialAndFinalAggregationType) FIELD_DELIM(org.apache.hadoop.hive.serde.serdeConstants.FIELD_DELIM) HiveUtil.verifyPartitionTypeSupported(io.prestosql.plugin.hive.HiveUtil.verifyPartitionTypeSupported) FileSystem(org.apache.hadoop.fs.FileSystem) HIVE_FILESYSTEM_ERROR(io.prestosql.plugin.hive.HiveErrorCode.HIVE_FILESYSTEM_ERROR) HiveUtil.hiveColumnHandles(io.prestosql.plugin.hive.HiveUtil.hiveColumnHandles) MetastoreUtil(io.prestosql.plugin.hive.metastore.MetastoreUtil) TableAlreadyExistsException(io.prestosql.spi.connector.TableAlreadyExistsException) NullableValue(io.prestosql.spi.predicate.NullableValue) RoleGrant(io.prestosql.spi.security.RoleGrant) ConnectorVacuumTableHandle(io.prestosql.spi.connector.ConnectorVacuumTableHandle) FileStatus(org.apache.hadoop.fs.FileStatus) SCHEMA_NOT_EMPTY(io.prestosql.spi.StandardErrorCode.SCHEMA_NOT_EMPTY) HiveUtil.getPartitionKeyColumnHandles(io.prestosql.plugin.hive.HiveUtil.getPartitionKeyColumnHandles) ConnectorDeleteAsInsertTableHandle(io.prestosql.spi.connector.ConnectorDeleteAsInsertTableHandle) Future(java.util.concurrent.Future) TableNotFoundException(io.prestosql.spi.connector.TableNotFoundException) BucketingVersion(io.prestosql.plugin.hive.HiveBucketing.BucketingVersion) ConnectorUpdateTableHandle(io.prestosql.spi.connector.ConnectorUpdateTableHandle) Map(java.util.Map) HiveTableProperties.getPartitionedBy(io.prestosql.plugin.hive.HiveTableProperties.getPartitionedBy) ENGLISH(java.util.Locale.ENGLISH) ConstraintApplicationResult(io.prestosql.spi.connector.ConstraintApplicationResult) SystemTable(io.prestosql.spi.connector.SystemTable) GrantInfo(io.prestosql.spi.security.GrantInfo) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) org.apache.hadoop.hive.serde.serdeConstants(org.apache.hadoop.hive.serde.serdeConstants) TableStatisticsMetadata(io.prestosql.spi.statistics.TableStatisticsMetadata) Set(java.util.Set) LOCATION_PROPERTY(io.prestosql.plugin.hive.HiveTableProperties.LOCATION_PROPERTY) HiveTableProperties.getTransactionalValue(io.prestosql.plugin.hive.HiveTableProperties.getTransactionalValue) MANAGED_TABLE(org.apache.hadoop.hive.metastore.TableType.MANAGED_TABLE) Collectors.joining(java.util.stream.Collectors.joining) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) Stream(java.util.stream.Stream) Table(io.prestosql.plugin.hive.metastore.Table) Privilege(io.prestosql.spi.security.Privilege) INVALID_TABLE_PROPERTY(io.prestosql.spi.StandardErrorCode.INVALID_TABLE_PROPERTY) HiveTableProperties.isExternalTable(io.prestosql.plugin.hive.HiveTableProperties.isExternalTable) Domain(io.prestosql.spi.predicate.Domain) GENERIC_INTERNAL_ERROR(io.prestosql.spi.StandardErrorCode.GENERIC_INTERNAL_ERROR) AccessControlMetadata(io.prestosql.plugin.hive.security.AccessControlMetadata) ColumnStatisticMetadata(io.prestosql.spi.statistics.ColumnStatisticMetadata) SortingColumn(io.prestosql.plugin.hive.metastore.SortingColumn) ConnectorVacuumTableInfo(io.prestosql.spi.connector.ConnectorVacuumTableInfo) ORC(io.prestosql.plugin.hive.HiveStorageFormat.ORC) SchemaTablePrefix(io.prestosql.spi.connector.SchemaTablePrefix) Joiner(com.google.common.base.Joiner) Iterables(com.google.common.collect.Iterables) Database(io.prestosql.plugin.hive.metastore.Database) Slice(io.airlift.slice.Slice) Partition(io.prestosql.plugin.hive.metastore.Partition) TRANSACTIONAL(io.prestosql.plugin.hive.HiveTableProperties.TRANSACTIONAL) HiveWriterFactory.getSnapshotSubFileIndex(io.prestosql.plugin.hive.HiveWriterFactory.getSnapshotSubFileIndex) Supplier(java.util.function.Supplier) ComputedStatistics(io.prestosql.spi.statistics.ComputedStatistics) ArrayList(java.util.ArrayList) HiveUtil.decodeViewData(io.prestosql.plugin.hive.HiveUtil.decodeViewData) OptionalLong(java.util.OptionalLong) TupleDomain.withColumnDomains(io.prestosql.spi.predicate.TupleDomain.withColumnDomains) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) ConnectorPartitioningHandle(io.prestosql.spi.connector.ConnectorPartitioningHandle) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) RecordCursor(io.prestosql.spi.connector.RecordCursor) DiscretePredicates(io.prestosql.spi.connector.DiscretePredicates) SemiTransactionalHiveMetastore(io.prestosql.plugin.hive.metastore.SemiTransactionalHiveMetastore) HiveWriterFactory.removeSnapshotFileName(io.prestosql.plugin.hive.HiveWriterFactory.removeSnapshotFileName) ImmutableSortedMap(com.google.common.collect.ImmutableSortedMap) ConnectorOutputTableHandle(io.prestosql.spi.connector.ConnectorOutputTableHandle) Properties(java.util.Properties) HiveUtil.isPrestoView(io.prestosql.plugin.hive.HiveUtil.isPrestoView) TypeManager(io.prestosql.spi.type.TypeManager) IOException(java.io.IOException) USER(io.prestosql.spi.security.PrincipalType.USER) PrincipalPrivileges(io.prestosql.plugin.hive.metastore.PrincipalPrivileges) ConnectorTableMetadata(io.prestosql.spi.connector.ConnectorTableMetadata) File(java.io.File) ExecutionException(java.util.concurrent.ExecutionException) Streams.stream(com.google.common.collect.Streams.stream) IS_EXTERNAL_TABLE(io.prestosql.plugin.hive.HiveTableProperties.IS_EXTERNAL_TABLE) HiveColumnStatistics(io.prestosql.plugin.hive.metastore.HiveColumnStatistics) ColumnHandle(io.prestosql.spi.connector.ColumnHandle) ConnectorTablePartitioning(io.prestosql.spi.connector.ConnectorTablePartitioning) TableType(org.apache.hadoop.hive.metastore.TableType) ConfigurationUtils(io.prestosql.plugin.hive.util.ConfigurationUtils) PrestoPrincipal(io.prestosql.spi.security.PrestoPrincipal) HiveWriterFactory.isSnapshotFile(io.prestosql.plugin.hive.HiveWriterFactory.isSnapshotFile) VarcharType(io.prestosql.spi.type.VarcharType) HiveTableProperties.getLocation(io.prestosql.plugin.hive.HiveTableProperties.getLocation) ConnectorMetadata(io.prestosql.spi.connector.ConnectorMetadata) Statistics(io.prestosql.plugin.hive.util.Statistics) URL(java.net.URL) HdfsContext(io.prestosql.plugin.hive.HdfsEnvironment.HdfsContext) EXTERNAL_TABLE(org.apache.hadoop.hive.metastore.TableType.EXTERNAL_TABLE) ViewNotFoundException(io.prestosql.spi.connector.ViewNotFoundException) HiveTableProperties.getHiveStorageFormat(io.prestosql.plugin.hive.HiveTableProperties.getHiveStorageFormat) Duration(io.airlift.units.Duration) TableStatisticType(io.prestosql.spi.statistics.TableStatisticType) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) INVALID_ANALYZE_PROPERTY(io.prestosql.spi.StandardErrorCode.INVALID_ANALYZE_PROPERTY) ConnectorSession(io.prestosql.spi.connector.ConnectorSession) Collectors.toMap(java.util.stream.Collectors.toMap) ConnectorTableProperties(io.prestosql.spi.connector.ConnectorTableProperties) Iterables.concat(com.google.common.collect.Iterables.concat) Path(org.apache.hadoop.fs.Path) Type(io.prestosql.spi.type.Type) Splitter(com.google.common.base.Splitter) BIGINT(io.prestosql.spi.type.BigintType.BIGINT) Collectors.toSet(java.util.stream.Collectors.toSet) Constraint(io.prestosql.spi.connector.Constraint) PrestoException(io.prestosql.spi.PrestoException) ImmutableSet(com.google.common.collect.ImmutableSet) HiveWriteUtils.isS3FileSystem(io.prestosql.plugin.hive.HiveWriteUtils.isS3FileSystem) ImmutableMap(com.google.common.collect.ImmutableMap) Predicate(java.util.function.Predicate) HiveUtil.columnExtraInfo(io.prestosql.plugin.hive.HiveUtil.columnExtraInfo) Collections.emptyList(java.util.Collections.emptyList) Collection(java.util.Collection) HiveUtil.encodeViewData(io.prestosql.plugin.hive.HiveUtil.encodeViewData) HiveWriterFactory.isSnapshotSubFile(io.prestosql.plugin.hive.HiveWriterFactory.isSnapshotSubFile) ROW_COUNT(io.prestosql.spi.statistics.TableStatisticType.ROW_COUNT) Collectors(java.util.stream.Collectors) Sets(com.google.common.collect.Sets) String.format(java.lang.String.format) List(java.util.List) PRESTO_VIEW_FLAG(io.prestosql.plugin.hive.HiveUtil.PRESTO_VIEW_FLAG) VarcharType.createUnboundedVarcharType(io.prestosql.spi.type.VarcharType.createUnboundedVarcharType) Function.identity(java.util.function.Function.identity) ConnectorTransactionHandle(io.prestosql.spi.connector.ConnectorTransactionHandle) Optional(java.util.Optional) MoreObjects.firstNonNull(com.google.common.base.MoreObjects.firstNonNull) NOT_SUPPORTED(io.prestosql.spi.StandardErrorCode.NOT_SUPPORTED) AcidUtils(org.apache.hadoop.hive.ql.io.AcidUtils) HiveStatisticsProvider(io.prestosql.plugin.hive.statistics.HiveStatisticsProvider) JsonCodec(io.airlift.json.JsonCodec) IntStream(java.util.stream.IntStream) ConnectorOutputMetadata(io.prestosql.spi.connector.ConnectorOutputMetadata) Logger(io.airlift.log.Logger) ConnectorViewDefinition(io.prestosql.spi.connector.ConnectorViewDefinition) ConnectorNewTableLayout(io.prestosql.spi.connector.ConnectorNewTableLayout) HashMap(java.util.HashMap) HiveUtil.toPartitionValues(io.prestosql.plugin.hive.HiveUtil.toPartitionValues) HivePrincipal(io.prestosql.plugin.hive.metastore.HivePrincipal) OptionalInt(java.util.OptionalInt) Function(java.util.function.Function) ColumnStatisticType(io.prestosql.spi.statistics.ColumnStatisticType) InMemoryRecordSet(io.prestosql.spi.connector.InMemoryRecordSet) HashSet(java.util.HashSet) HiveTableProperties.getExternalLocation(io.prestosql.plugin.hive.HiveTableProperties.getExternalLocation) SchemaTableName(io.prestosql.spi.connector.SchemaTableName) OpenCSVSerde(org.apache.hadoop.hive.serde2.OpenCSVSerde) INVALID_SCHEMA_PROPERTY(io.prestosql.spi.StandardErrorCode.INVALID_SCHEMA_PROPERTY) ImmutableList(com.google.common.collect.ImmutableList) Verify.verify(com.google.common.base.Verify.verify) Objects.requireNonNull(java.util.Objects.requireNonNull) Suppliers(com.google.common.base.Suppliers) NoSuchElementException(java.util.NoSuchElementException) Block(io.prestosql.spi.block.Block) VerifyException(com.google.common.base.VerifyException) Collections.emptyMap(java.util.Collections.emptyMap) HiveIdentity(io.prestosql.plugin.hive.authentication.HiveIdentity) MalformedURLException(java.net.MalformedURLException) ColumnMetadata(io.prestosql.spi.connector.ColumnMetadata) ConnectorTableHandle(io.prestosql.spi.connector.ConnectorTableHandle) TupleDomain(io.prestosql.spi.predicate.TupleDomain) NON_INHERITABLE_PROPERTIES(io.prestosql.plugin.hive.HiveTableProperties.NON_INHERITABLE_PROPERTIES) Maps(com.google.common.collect.Maps) PRIMITIVE(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category.PRIMITIVE) JobConf(org.apache.hadoop.mapred.JobConf) Collectors.toList(java.util.stream.Collectors.toList) Column(io.prestosql.plugin.hive.metastore.Column) GENERIC_USER_ERROR(io.prestosql.spi.StandardErrorCode.GENERIC_USER_ERROR) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Comparator(java.util.Comparator) ConnectorInsertTableHandle(io.prestosql.spi.connector.ConnectorInsertTableHandle) HiveBucketing.bucketedOnTimestamp(io.prestosql.plugin.hive.HiveBucketing.bucketedOnTimestamp) PrestoException(io.prestosql.spi.PrestoException) HiveIdentity(io.prestosql.plugin.hive.authentication.HiveIdentity) HiveTableProperties.getHiveStorageFormat(io.prestosql.plugin.hive.HiveTableProperties.getHiveStorageFormat) SortingColumn(io.prestosql.plugin.hive.metastore.SortingColumn) Column(io.prestosql.plugin.hive.metastore.Column) HdfsContext(io.prestosql.plugin.hive.HdfsEnvironment.HdfsContext) Path(org.apache.hadoop.fs.Path) SchemaTableName(io.prestosql.spi.connector.SchemaTableName) OptionalLong(java.util.OptionalLong)

Example 4 with HiveTableProperties.getHiveStorageFormat

use of io.prestosql.plugin.hive.HiveTableProperties.getHiveStorageFormat in project hetu-core by openlookeng.

the class HiveMetadata method getEmptyTableProperties.

protected Map<String, String> getEmptyTableProperties(ConnectorTableMetadata tableMetadata, Optional<HiveBucketProperty> bucketProperty, HdfsContext hdfsContext) {
    HiveStorageFormat hiveStorageFormat = HiveTableProperties.getHiveStorageFormat(tableMetadata.getProperties());
    ImmutableMap.Builder<String, String> tableProperties = ImmutableMap.builder();
    bucketProperty.ifPresent(hiveBucketProperty -> tableProperties.put(BUCKETING_VERSION, Integer.toString(hiveBucketProperty.getBucketingVersion().getVersion())));
    // ORC format specific properties
    if (getTransactionalValue(tableMetadata.getProperties())) {
        if (!hiveStorageFormat.equals(HiveStorageFormat.ORC)) {
            // only ORC storage format support ACID
            throw new PrestoException(NOT_SUPPORTED, "Only ORC storage format supports creating transactional table.");
        }
        // set transactional property.
        tableProperties.put(TRANSACTIONAL, Boolean.toString(getTransactionalValue(tableMetadata.getProperties())));
    }
    List<String> columns = HiveTableProperties.getOrcBloomFilterColumns(tableMetadata.getProperties());
    if (columns != null && !columns.isEmpty()) {
        checkFormatForProperty(hiveStorageFormat, HiveStorageFormat.ORC, HiveTableProperties.ORC_BLOOM_FILTER_COLUMNS);
        tableProperties.put(ORC_BLOOM_FILTER_COLUMNS_KEY, Joiner.on(",").join(columns));
        tableProperties.put(ORC_BLOOM_FILTER_FPP_KEY, String.valueOf(HiveTableProperties.getOrcBloomFilterFpp(tableMetadata.getProperties())));
    }
    // Avro specific properties
    String avroSchemaUrl = HiveTableProperties.getAvroSchemaUrl(tableMetadata.getProperties());
    if (avroSchemaUrl != null) {
        checkFormatForProperty(hiveStorageFormat, HiveStorageFormat.AVRO, HiveTableProperties.AVRO_SCHEMA_URL);
        tableProperties.put(AVRO_SCHEMA_URL_KEY, validateAndNormalizeAvroSchemaUrl(avroSchemaUrl, hdfsContext));
    }
    // Textfile specific properties
    HiveTableProperties.getTextHeaderSkipCount(tableMetadata.getProperties()).ifPresent(headerSkipCount -> {
        if (headerSkipCount > 0) {
            checkFormatForProperty(hiveStorageFormat, HiveStorageFormat.TEXTFILE, HiveTableProperties.TEXTFILE_SKIP_HEADER_LINE_COUNT);
            tableProperties.put(TEXT_SKIP_HEADER_COUNT_KEY, String.valueOf(headerSkipCount));
        }
        if (headerSkipCount < 0) {
            throw new PrestoException(HiveErrorCode.HIVE_INVALID_METADATA, String.format("Invalid value for %s property: %s", HiveTableProperties.TEXTFILE_SKIP_HEADER_LINE_COUNT, headerSkipCount));
        }
    });
    HiveTableProperties.getTextFooterSkipCount(tableMetadata.getProperties()).ifPresent(footerSkipCount -> {
        if (footerSkipCount > 0) {
            checkFormatForProperty(hiveStorageFormat, HiveStorageFormat.TEXTFILE, HiveTableProperties.TEXTFILE_SKIP_FOOTER_LINE_COUNT);
            tableProperties.put(TEXT_SKIP_FOOTER_COUNT_KEY, String.valueOf(footerSkipCount));
        }
        if (footerSkipCount < 0) {
            throw new PrestoException(HiveErrorCode.HIVE_INVALID_METADATA, String.format("Invalid value for %s property: %s", HiveTableProperties.TEXTFILE_SKIP_FOOTER_LINE_COUNT, footerSkipCount));
        }
    });
    // CSV specific properties
    HiveTableProperties.getCsvProperty(tableMetadata.getProperties(), HiveTableProperties.CSV_ESCAPE).ifPresent(escape -> {
        checkFormatForProperty(hiveStorageFormat, HiveStorageFormat.CSV, HiveTableProperties.CSV_ESCAPE);
        tableProperties.put(CSV_ESCAPE_KEY, escape.toString());
    });
    HiveTableProperties.getCsvProperty(tableMetadata.getProperties(), HiveTableProperties.CSV_QUOTE).ifPresent(quote -> {
        checkFormatForProperty(hiveStorageFormat, HiveStorageFormat.CSV, HiveTableProperties.CSV_QUOTE);
        tableProperties.put(CSV_QUOTE_KEY, quote.toString());
    });
    HiveTableProperties.getCsvProperty(tableMetadata.getProperties(), HiveTableProperties.CSV_SEPARATOR).ifPresent(separator -> {
        checkFormatForProperty(hiveStorageFormat, HiveStorageFormat.CSV, HiveTableProperties.CSV_SEPARATOR);
        tableProperties.put(CSV_SEPARATOR_KEY, separator.toString());
    });
    // Table comment property
    tableMetadata.getComment().ifPresent(value -> tableProperties.put(TABLE_COMMENT, value));
    // field delimiter
    Object fieldDelimit = tableMetadata.getProperties().get(FIELD_DELIM);
    if (hiveStorageFormat.equals(HiveStorageFormat.MULTIDELIMIT) && fieldDelimit == null) {
        throw new PrestoException(INVALID_TABLE_PROPERTY, "This table does not have serde property \"field.delim\"!");
    }
    if (fieldDelimit != null) {
        checkFormatForProperty(hiveStorageFormat, HiveStorageFormat.MULTIDELIMIT, FIELD_DELIM);
        tableProperties.put(FIELD_DELIM, fieldDelimit.toString());
    }
    return tableProperties.build();
}
Also used : HiveTableProperties.getHiveStorageFormat(io.prestosql.plugin.hive.HiveTableProperties.getHiveStorageFormat) PrestoException(io.prestosql.spi.PrestoException) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) ImmutableMap(com.google.common.collect.ImmutableMap)

Example 5 with HiveTableProperties.getHiveStorageFormat

use of io.prestosql.plugin.hive.HiveTableProperties.getHiveStorageFormat in project boostkit-bigdata by kunpengcompute.

the class HiveMetadata method beginCreateTable.

@Override
public HiveOutputTableHandle beginCreateTable(ConnectorSession session, ConnectorTableMetadata tableMetadata, Optional<ConnectorNewTableLayout> layout) {
    if (getExternalLocation(tableMetadata.getProperties()) != null || isExternalTable(tableMetadata.getProperties())) {
        throw new PrestoException(NOT_SUPPORTED, "External tables cannot be created using CREATE TABLE AS");
    }
    if (HiveTableProperties.getAvroSchemaUrl(tableMetadata.getProperties()) != null) {
        throw new PrestoException(NOT_SUPPORTED, "CREATE TABLE AS not supported when Avro schema url is set");
    }
    HiveStorageFormat tableStorageFormat = HiveTableProperties.getHiveStorageFormat(tableMetadata.getProperties());
    List<String> partitionedBy = getPartitionedBy(tableMetadata.getProperties());
    Optional<HiveBucketProperty> bucketProperty = HiveTableProperties.getBucketProperty(tableMetadata.getProperties());
    // get the root directory for the database
    SchemaTableName schemaTableName = tableMetadata.getTable();
    String schemaName = schemaTableName.getSchemaName();
    String tableName = schemaTableName.getTableName();
    Map<String, String> tableProperties = getEmptyTableProperties(tableMetadata, bucketProperty, new HdfsContext(session, schemaName, tableName));
    List<HiveColumnHandle> columnHandles = getColumnHandles(tableMetadata, ImmutableSet.copyOf(partitionedBy), typeTranslator);
    HiveStorageFormat partitionStorageFormat = HiveSessionProperties.isRespectTableFormat(session) ? tableStorageFormat : HiveSessionProperties.getHiveStorageFormat(session);
    // unpartitioned tables ignore the partition storage format
    HiveStorageFormat actualStorageFormat = partitionedBy.isEmpty() ? tableStorageFormat : partitionStorageFormat;
    actualStorageFormat.validateColumns(columnHandles);
    Map<String, HiveColumnHandle> columnHandlesByName = Maps.uniqueIndex(columnHandles, HiveColumnHandle::getName);
    List<Column> partitionColumns = partitionedBy.stream().map(columnHandlesByName::get).map(column -> new Column(column.getName(), column.getHiveType(), column.getComment())).collect(toList());
    checkPartitionTypesSupported(partitionColumns);
    Optional<String> location = getLocation(tableMetadata.getProperties());
    if (location.isPresent() && !tableCreatesWithLocationAllowed) {
        throw new PrestoException(NOT_SUPPORTED, format("Setting %s property is not allowed", LOCATION_PROPERTY));
    }
    Optional<WriteIdInfo> writeIdInfo = Optional.empty();
    if (AcidUtils.isTransactionalTable(tableProperties)) {
        // Create the HiveTableHandle for just to obtain writeIds.
        List<HiveColumnHandle> partitionColumnHandles = partitionedBy.stream().map(columnHandlesByName::get).collect(toList());
        HiveTableHandle tableHandle = new HiveTableHandle(schemaName, tableName, tableProperties, partitionColumnHandles, Optional.empty());
        Optional<Long> writeId = metastore.getTableWriteId(session, tableHandle, HiveACIDWriteType.INSERT);
        if (!writeId.isPresent()) {
            throw new IllegalStateException("No validWriteIds present");
        }
        writeIdInfo = Optional.of(new WriteIdInfo(writeId.get(), writeId.get(), 0));
    }
    LocationHandle locationHandle;
    if (location.isPresent()) {
        Path path = getPath(new HdfsContext(session, schemaName, tableName), location.get(), false);
        locationHandle = locationService.forNewTable(metastore, session, schemaName, tableName, writeIdInfo, Optional.of(path), HiveWriteUtils.OpertionType.CREATE_TABLE_AS);
    } else {
        locationHandle = locationService.forNewTable(metastore, session, schemaName, tableName, writeIdInfo, Optional.empty(), HiveWriteUtils.OpertionType.CREATE_TABLE_AS);
    }
    HiveOutputTableHandle result = new HiveOutputTableHandle(schemaName, tableName, columnHandles, metastore.generatePageSinkMetadata(new HiveIdentity(session), schemaTableName), locationHandle, tableStorageFormat, partitionStorageFormat, partitionedBy, bucketProperty, session.getUser(), tableProperties);
    LocationService.WriteInfo writeInfo = locationService.getQueryWriteInfo(locationHandle);
    metastore.declareIntentionToWrite(session, writeInfo.getWriteMode(), writeInfo.getWritePath(), schemaTableName);
    return result;
}
Also used : TableStatistics(io.prestosql.spi.statistics.TableStatistics) StorageFormat(io.prestosql.plugin.hive.metastore.StorageFormat) PartialAndFinalAggregationType(io.prestosql.spi.PartialAndFinalAggregationType) FIELD_DELIM(org.apache.hadoop.hive.serde.serdeConstants.FIELD_DELIM) HiveUtil.verifyPartitionTypeSupported(io.prestosql.plugin.hive.HiveUtil.verifyPartitionTypeSupported) FileSystem(org.apache.hadoop.fs.FileSystem) HIVE_FILESYSTEM_ERROR(io.prestosql.plugin.hive.HiveErrorCode.HIVE_FILESYSTEM_ERROR) HiveUtil.hiveColumnHandles(io.prestosql.plugin.hive.HiveUtil.hiveColumnHandles) MetastoreUtil(io.prestosql.plugin.hive.metastore.MetastoreUtil) TableAlreadyExistsException(io.prestosql.spi.connector.TableAlreadyExistsException) NullableValue(io.prestosql.spi.predicate.NullableValue) RoleGrant(io.prestosql.spi.security.RoleGrant) ConnectorVacuumTableHandle(io.prestosql.spi.connector.ConnectorVacuumTableHandle) FileStatus(org.apache.hadoop.fs.FileStatus) SCHEMA_NOT_EMPTY(io.prestosql.spi.StandardErrorCode.SCHEMA_NOT_EMPTY) HiveUtil.getPartitionKeyColumnHandles(io.prestosql.plugin.hive.HiveUtil.getPartitionKeyColumnHandles) ConnectorDeleteAsInsertTableHandle(io.prestosql.spi.connector.ConnectorDeleteAsInsertTableHandle) Future(java.util.concurrent.Future) TableNotFoundException(io.prestosql.spi.connector.TableNotFoundException) BucketingVersion(io.prestosql.plugin.hive.HiveBucketing.BucketingVersion) ConnectorUpdateTableHandle(io.prestosql.spi.connector.ConnectorUpdateTableHandle) Map(java.util.Map) HiveTableProperties.getPartitionedBy(io.prestosql.plugin.hive.HiveTableProperties.getPartitionedBy) ENGLISH(java.util.Locale.ENGLISH) ConstraintApplicationResult(io.prestosql.spi.connector.ConstraintApplicationResult) SystemTable(io.prestosql.spi.connector.SystemTable) GrantInfo(io.prestosql.spi.security.GrantInfo) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) org.apache.hadoop.hive.serde.serdeConstants(org.apache.hadoop.hive.serde.serdeConstants) TableStatisticsMetadata(io.prestosql.spi.statistics.TableStatisticsMetadata) Set(java.util.Set) LOCATION_PROPERTY(io.prestosql.plugin.hive.HiveTableProperties.LOCATION_PROPERTY) HiveTableProperties.getTransactionalValue(io.prestosql.plugin.hive.HiveTableProperties.getTransactionalValue) MANAGED_TABLE(org.apache.hadoop.hive.metastore.TableType.MANAGED_TABLE) Collectors.joining(java.util.stream.Collectors.joining) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) Stream(java.util.stream.Stream) Table(io.prestosql.plugin.hive.metastore.Table) Privilege(io.prestosql.spi.security.Privilege) INVALID_TABLE_PROPERTY(io.prestosql.spi.StandardErrorCode.INVALID_TABLE_PROPERTY) HiveTableProperties.isExternalTable(io.prestosql.plugin.hive.HiveTableProperties.isExternalTable) Domain(io.prestosql.spi.predicate.Domain) GENERIC_INTERNAL_ERROR(io.prestosql.spi.StandardErrorCode.GENERIC_INTERNAL_ERROR) AccessControlMetadata(io.prestosql.plugin.hive.security.AccessControlMetadata) ColumnStatisticMetadata(io.prestosql.spi.statistics.ColumnStatisticMetadata) SortingColumn(io.prestosql.plugin.hive.metastore.SortingColumn) ConnectorVacuumTableInfo(io.prestosql.spi.connector.ConnectorVacuumTableInfo) ORC(io.prestosql.plugin.hive.HiveStorageFormat.ORC) SchemaTablePrefix(io.prestosql.spi.connector.SchemaTablePrefix) Joiner(com.google.common.base.Joiner) Iterables(com.google.common.collect.Iterables) Database(io.prestosql.plugin.hive.metastore.Database) Slice(io.airlift.slice.Slice) Partition(io.prestosql.plugin.hive.metastore.Partition) TRANSACTIONAL(io.prestosql.plugin.hive.HiveTableProperties.TRANSACTIONAL) HiveWriterFactory.getSnapshotSubFileIndex(io.prestosql.plugin.hive.HiveWriterFactory.getSnapshotSubFileIndex) Supplier(java.util.function.Supplier) ComputedStatistics(io.prestosql.spi.statistics.ComputedStatistics) ArrayList(java.util.ArrayList) HiveUtil.decodeViewData(io.prestosql.plugin.hive.HiveUtil.decodeViewData) OptionalLong(java.util.OptionalLong) TupleDomain.withColumnDomains(io.prestosql.spi.predicate.TupleDomain.withColumnDomains) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) ConnectorPartitioningHandle(io.prestosql.spi.connector.ConnectorPartitioningHandle) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) RecordCursor(io.prestosql.spi.connector.RecordCursor) DiscretePredicates(io.prestosql.spi.connector.DiscretePredicates) SemiTransactionalHiveMetastore(io.prestosql.plugin.hive.metastore.SemiTransactionalHiveMetastore) HiveWriterFactory.removeSnapshotFileName(io.prestosql.plugin.hive.HiveWriterFactory.removeSnapshotFileName) ImmutableSortedMap(com.google.common.collect.ImmutableSortedMap) ConnectorOutputTableHandle(io.prestosql.spi.connector.ConnectorOutputTableHandle) Properties(java.util.Properties) HiveUtil.isPrestoView(io.prestosql.plugin.hive.HiveUtil.isPrestoView) TypeManager(io.prestosql.spi.type.TypeManager) IOException(java.io.IOException) USER(io.prestosql.spi.security.PrincipalType.USER) PrincipalPrivileges(io.prestosql.plugin.hive.metastore.PrincipalPrivileges) ConnectorTableMetadata(io.prestosql.spi.connector.ConnectorTableMetadata) File(java.io.File) ExecutionException(java.util.concurrent.ExecutionException) Streams.stream(com.google.common.collect.Streams.stream) IS_EXTERNAL_TABLE(io.prestosql.plugin.hive.HiveTableProperties.IS_EXTERNAL_TABLE) HiveColumnStatistics(io.prestosql.plugin.hive.metastore.HiveColumnStatistics) ColumnHandle(io.prestosql.spi.connector.ColumnHandle) ConnectorTablePartitioning(io.prestosql.spi.connector.ConnectorTablePartitioning) TableType(org.apache.hadoop.hive.metastore.TableType) ConfigurationUtils(io.prestosql.plugin.hive.util.ConfigurationUtils) PrestoPrincipal(io.prestosql.spi.security.PrestoPrincipal) HiveWriterFactory.isSnapshotFile(io.prestosql.plugin.hive.HiveWriterFactory.isSnapshotFile) VarcharType(io.prestosql.spi.type.VarcharType) HiveTableProperties.getLocation(io.prestosql.plugin.hive.HiveTableProperties.getLocation) ConnectorMetadata(io.prestosql.spi.connector.ConnectorMetadata) Statistics(io.prestosql.plugin.hive.util.Statistics) URL(java.net.URL) HdfsContext(io.prestosql.plugin.hive.HdfsEnvironment.HdfsContext) EXTERNAL_TABLE(org.apache.hadoop.hive.metastore.TableType.EXTERNAL_TABLE) ViewNotFoundException(io.prestosql.spi.connector.ViewNotFoundException) HiveTableProperties.getHiveStorageFormat(io.prestosql.plugin.hive.HiveTableProperties.getHiveStorageFormat) Duration(io.airlift.units.Duration) TableStatisticType(io.prestosql.spi.statistics.TableStatisticType) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) INVALID_ANALYZE_PROPERTY(io.prestosql.spi.StandardErrorCode.INVALID_ANALYZE_PROPERTY) ConnectorSession(io.prestosql.spi.connector.ConnectorSession) Collectors.toMap(java.util.stream.Collectors.toMap) ConnectorTableProperties(io.prestosql.spi.connector.ConnectorTableProperties) Iterables.concat(com.google.common.collect.Iterables.concat) Path(org.apache.hadoop.fs.Path) Type(io.prestosql.spi.type.Type) Splitter(com.google.common.base.Splitter) BIGINT(io.prestosql.spi.type.BigintType.BIGINT) Collectors.toSet(java.util.stream.Collectors.toSet) Constraint(io.prestosql.spi.connector.Constraint) PrestoException(io.prestosql.spi.PrestoException) ImmutableSet(com.google.common.collect.ImmutableSet) HiveWriteUtils.isS3FileSystem(io.prestosql.plugin.hive.HiveWriteUtils.isS3FileSystem) ImmutableMap(com.google.common.collect.ImmutableMap) Predicate(java.util.function.Predicate) HiveUtil.columnExtraInfo(io.prestosql.plugin.hive.HiveUtil.columnExtraInfo) Collections.emptyList(java.util.Collections.emptyList) Collection(java.util.Collection) HiveUtil.encodeViewData(io.prestosql.plugin.hive.HiveUtil.encodeViewData) HiveWriterFactory.isSnapshotSubFile(io.prestosql.plugin.hive.HiveWriterFactory.isSnapshotSubFile) ROW_COUNT(io.prestosql.spi.statistics.TableStatisticType.ROW_COUNT) Collectors(java.util.stream.Collectors) Sets(com.google.common.collect.Sets) String.format(java.lang.String.format) List(java.util.List) PRESTO_VIEW_FLAG(io.prestosql.plugin.hive.HiveUtil.PRESTO_VIEW_FLAG) VarcharType.createUnboundedVarcharType(io.prestosql.spi.type.VarcharType.createUnboundedVarcharType) Function.identity(java.util.function.Function.identity) ConnectorTransactionHandle(io.prestosql.spi.connector.ConnectorTransactionHandle) Optional(java.util.Optional) MoreObjects.firstNonNull(com.google.common.base.MoreObjects.firstNonNull) NOT_SUPPORTED(io.prestosql.spi.StandardErrorCode.NOT_SUPPORTED) AcidUtils(org.apache.hadoop.hive.ql.io.AcidUtils) HiveStatisticsProvider(io.prestosql.plugin.hive.statistics.HiveStatisticsProvider) JsonCodec(io.airlift.json.JsonCodec) IntStream(java.util.stream.IntStream) ConnectorOutputMetadata(io.prestosql.spi.connector.ConnectorOutputMetadata) Logger(io.airlift.log.Logger) ConnectorViewDefinition(io.prestosql.spi.connector.ConnectorViewDefinition) ConnectorNewTableLayout(io.prestosql.spi.connector.ConnectorNewTableLayout) HashMap(java.util.HashMap) HiveUtil.toPartitionValues(io.prestosql.plugin.hive.HiveUtil.toPartitionValues) HivePrincipal(io.prestosql.plugin.hive.metastore.HivePrincipal) OptionalInt(java.util.OptionalInt) Function(java.util.function.Function) ColumnStatisticType(io.prestosql.spi.statistics.ColumnStatisticType) InMemoryRecordSet(io.prestosql.spi.connector.InMemoryRecordSet) HashSet(java.util.HashSet) HiveTableProperties.getExternalLocation(io.prestosql.plugin.hive.HiveTableProperties.getExternalLocation) SchemaTableName(io.prestosql.spi.connector.SchemaTableName) OpenCSVSerde(org.apache.hadoop.hive.serde2.OpenCSVSerde) INVALID_SCHEMA_PROPERTY(io.prestosql.spi.StandardErrorCode.INVALID_SCHEMA_PROPERTY) ImmutableList(com.google.common.collect.ImmutableList) Verify.verify(com.google.common.base.Verify.verify) Objects.requireNonNull(java.util.Objects.requireNonNull) Suppliers(com.google.common.base.Suppliers) NoSuchElementException(java.util.NoSuchElementException) Block(io.prestosql.spi.block.Block) VerifyException(com.google.common.base.VerifyException) Collections.emptyMap(java.util.Collections.emptyMap) HiveIdentity(io.prestosql.plugin.hive.authentication.HiveIdentity) MalformedURLException(java.net.MalformedURLException) ColumnMetadata(io.prestosql.spi.connector.ColumnMetadata) ConnectorTableHandle(io.prestosql.spi.connector.ConnectorTableHandle) TupleDomain(io.prestosql.spi.predicate.TupleDomain) NON_INHERITABLE_PROPERTIES(io.prestosql.plugin.hive.HiveTableProperties.NON_INHERITABLE_PROPERTIES) Maps(com.google.common.collect.Maps) PRIMITIVE(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category.PRIMITIVE) JobConf(org.apache.hadoop.mapred.JobConf) Collectors.toList(java.util.stream.Collectors.toList) Column(io.prestosql.plugin.hive.metastore.Column) GENERIC_USER_ERROR(io.prestosql.spi.StandardErrorCode.GENERIC_USER_ERROR) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Comparator(java.util.Comparator) ConnectorInsertTableHandle(io.prestosql.spi.connector.ConnectorInsertTableHandle) HiveBucketing.bucketedOnTimestamp(io.prestosql.plugin.hive.HiveBucketing.bucketedOnTimestamp) PrestoException(io.prestosql.spi.PrestoException) HiveIdentity(io.prestosql.plugin.hive.authentication.HiveIdentity) HiveTableProperties.getHiveStorageFormat(io.prestosql.plugin.hive.HiveTableProperties.getHiveStorageFormat) SortingColumn(io.prestosql.plugin.hive.metastore.SortingColumn) Column(io.prestosql.plugin.hive.metastore.Column) HdfsContext(io.prestosql.plugin.hive.HdfsEnvironment.HdfsContext) Path(org.apache.hadoop.fs.Path) SchemaTableName(io.prestosql.spi.connector.SchemaTableName) OptionalLong(java.util.OptionalLong)

Aggregations

ImmutableMap (com.google.common.collect.ImmutableMap)6 ImmutableMap.toImmutableMap (com.google.common.collect.ImmutableMap.toImmutableMap)6 HiveTableProperties.getHiveStorageFormat (io.prestosql.plugin.hive.HiveTableProperties.getHiveStorageFormat)6 PrestoException (io.prestosql.spi.PrestoException)6 VisibleForTesting (com.google.common.annotations.VisibleForTesting)4 Joiner (com.google.common.base.Joiner)4 MoreObjects.firstNonNull (com.google.common.base.MoreObjects.firstNonNull)4 Preconditions.checkArgument (com.google.common.base.Preconditions.checkArgument)4 Splitter (com.google.common.base.Splitter)4 Suppliers (com.google.common.base.Suppliers)4 Verify.verify (com.google.common.base.Verify.verify)4 VerifyException (com.google.common.base.VerifyException)4 ImmutableList (com.google.common.collect.ImmutableList)4 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)4 ImmutableSet (com.google.common.collect.ImmutableSet)4 ImmutableSet.toImmutableSet (com.google.common.collect.ImmutableSet.toImmutableSet)4 ImmutableSortedMap (com.google.common.collect.ImmutableSortedMap)4 Iterables (com.google.common.collect.Iterables)4 Iterables.concat (com.google.common.collect.Iterables.concat)4 Maps (com.google.common.collect.Maps)4