Search in sources :

Example 31 with Partition

use of io.prestosql.plugin.hive.metastore.Partition in project boostkit-bigdata by kunpengcompute.

the class TestBackgroundHiveSplitLoader method testPartitionedTableWithDynamicFilter.

@Test
public void testPartitionedTableWithDynamicFilter() throws Exception {
    TypeManager typeManager = new TestingTypeManager();
    List<HivePartitionMetadata> hivePartitionMetadatas = ImmutableList.of(new HivePartitionMetadata(new HivePartition(new SchemaTableName("testSchema", "table_name")), Optional.of(new Partition("testSchema", "table_name", ImmutableList.of("1"), TABLE_STORAGE, ImmutableList.of(TABLE_COLUMN), ImmutableMap.of("param", "value"))), ImmutableMap.of()));
    ConnectorSession connectorSession = new TestingConnectorSession(new HiveSessionProperties(new HiveConfig().setMaxSplitSize(new DataSize(1.0, GIGABYTE)).setDynamicFilterPartitionFilteringEnabled(true), new OrcFileWriterConfig(), new ParquetFileWriterConfig()).getSessionProperties());
    BackgroundHiveSplitLoader backgroundHiveSplitLoader = new BackgroundHiveSplitLoader(PARTITIONED_TABLE, hivePartitionMetadatas, TupleDomain.all(), BackgroundHiveSplitLoader.BucketSplitInfo.createBucketSplitInfo(Optional.empty(), Optional.empty()), connectorSession, new TestingHdfsEnvironment(TEST_FILES), new NamenodeStats(), new CachingDirectoryLister(new HiveConfig()), directExecutor(), 2, false, Optional.empty(), createTestDynamicFilterSupplier("partitionColumn", ImmutableList.of(0L, 2L, 3L)), Optional.empty(), ImmutableMap.of(), typeManager);
    HiveSplitSource hiveSplitSource = hiveSplitSource(backgroundHiveSplitLoader);
    backgroundHiveSplitLoader.start(hiveSplitSource);
    List<HiveSplit> splits = drainSplits(hiveSplitSource);
    assertEquals(splits.size(), 0, "Splits should be filtered");
}
Also used : Partition(io.prestosql.plugin.hive.metastore.Partition) TestingConnectorSession(io.prestosql.testing.TestingConnectorSession) SchemaTableName(io.prestosql.spi.connector.SchemaTableName) DataSize(io.airlift.units.DataSize) TestingTypeManager(io.prestosql.spi.type.TestingTypeManager) TypeManager(io.prestosql.spi.type.TypeManager) ConnectorSession(io.prestosql.spi.connector.ConnectorSession) TestingConnectorSession(io.prestosql.testing.TestingConnectorSession) TestingTypeManager(io.prestosql.spi.type.TestingTypeManager) Test(org.testng.annotations.Test)

Example 32 with Partition

use of io.prestosql.plugin.hive.metastore.Partition in project hetu-core by openlookeng.

the class CarbondataMetadata method finishInsertInNewPartition.

@Override
protected void finishInsertInNewPartition(ConnectorSession session, HiveInsertTableHandle handle, Table table, Map<String, Type> columnTypes, PartitionUpdate partitionUpdate, Map<List<String>, ComputedStatistics> partitionComputedStatistics, HiveACIDWriteType acidWriteType) {
    // insert into new partition or overwrite existing partition
    if (partitionUpdate.getUpdateMode() == PartitionUpdate.UpdateMode.OVERWRITE) {
        List<String> partitionValues = toPartitionValues(partitionUpdate.getName());
        PartitionStatistics partitionStatistics = createPartitionStatistics(session, partitionUpdate.getStatistics(), columnTypes, getColumnStatistics(partitionComputedStatistics, partitionValues));
        metastore.finishInsertIntoExistingPartition(session, handle.getSchemaName(), handle.getTableName(), partitionValues, partitionUpdate.getWritePath(), partitionUpdate.getFileNames(), partitionStatistics, acidWriteType);
    } else if (partitionUpdate.getUpdateMode() == PartitionUpdate.UpdateMode.APPEND) {
        Partition partition = buildPartitionObject(session, table, partitionUpdate);
        if (!partition.getStorage().getStorageFormat().getInputFormat().equals(handle.getPartitionStorageFormat().getInputFormat()) && HiveSessionProperties.isRespectTableFormat(session)) {
            throw new PrestoException(HiveErrorCode.HIVE_CONCURRENT_MODIFICATION_DETECTED, "Partition format changed during insert");
        }
        PartitionStatistics partitionStatistics = createPartitionStatistics(session, partitionUpdate.getStatistics(), columnTypes, getColumnStatistics(partitionComputedStatistics, partition.getValues()));
        metastore.addPartition(session, handle.getSchemaName(), handle.getTableName(), partition, partitionUpdate.getWritePath(), partitionStatistics, acidWriteType);
    }
}
Also used : Partition(io.prestosql.plugin.hive.metastore.Partition) PartitionStatistics(io.prestosql.plugin.hive.PartitionStatistics) PrestoException(io.prestosql.spi.PrestoException)

Example 33 with Partition

use of io.prestosql.plugin.hive.metastore.Partition in project hetu-core by openlookeng.

the class CarbondataMetadata method updateSchemaInfoDropColumn.

private SchemaEvolutionEntry updateSchemaInfoDropColumn(ColumnHandle column) {
    HiveColumnHandle columnHandle = (HiveColumnHandle) column;
    TableSchema tableSchema = tableInfo.getFactTable();
    List<ColumnSchema> tableColumns = tableSchema.getListOfColumns();
    int currentSchemaOrdinal = tableColumns.stream().max(Comparator.comparing(ColumnSchema::getSchemaOrdinal)).orElseThrow(NoSuchElementException::new).getSchemaOrdinal() + 1;
    TableSchemaBuilder schemaBuilder = new TableSchemaBuilder();
    List<ColumnSchema> columnSchemas = new ArrayList<ColumnSchema>();
    ColumnSchema newColumn = schemaBuilder.addColumn(new StructField(columnHandle.getColumnName(), CarbondataHetuFilterUtil.spi2CarbondataTypeMapper(columnHandle)), null, false, false);
    newColumn.setSchemaOrdinal(currentSchemaOrdinal);
    columnSchemas.add(newColumn);
    PartitionInfo partitionInfo = tableInfo.getFactTable().getPartitionInfo();
    if (partitionInfo != null) {
        List<String> partitionColumnSchemaList = tableInfo.getFactTable().getPartitionInfo().getColumnSchemaList().stream().map(cols -> cols.getColumnName()).collect(toList());
        if (partitionColumnSchemaList.stream().anyMatch(partitionColumn -> partitionColumn.equals(newColumn.getColumnName()))) {
            throw new PrestoException(GENERIC_INTERNAL_ERROR, "Partition columns cannot be dropped");
        }
        // when table has two columns, dropping unpartitioned column will be wrong
        if (tableColumns.stream().filter(cols -> !cols.getColumnName().equals(newColumn.getColumnName())).map(cols -> cols.getColumnName()).equals(partitionColumnSchemaList)) {
            throw new PrestoException(GENERIC_INTERNAL_ERROR, "Cannot have table with all columns as partition columns");
        }
    }
    if (!tableColumns.stream().filter(cols -> cols.getColumnName().equals(newColumn.getColumnName())).collect(toList()).isEmpty()) {
        if (newColumn.isComplexColumn()) {
            throw new PrestoException(GENERIC_INTERNAL_ERROR, "Complex column cannot be dropped");
        }
    } else {
        throw new PrestoException(GENERIC_INTERNAL_ERROR, "Cannot have table with all columns as partition columns");
    }
    tableInfo.setLastUpdatedTime(System.currentTimeMillis());
    tableInfo.setFactTable(tableSchema);
    SchemaEvolutionEntry schemaEvolutionEntry = new SchemaEvolutionEntry();
    schemaEvolutionEntry.setTimeStamp(timeStamp);
    schemaEvolutionEntry.setRemoved(columnSchemas);
    return schemaEvolutionEntry;
}
Also used : Arrays(java.util.Arrays) StorageFormat(io.prestosql.plugin.hive.metastore.StorageFormat) BaseStorageFormat(io.prestosql.plugin.hive.BaseStorageFormat) HiveTableHandle(io.prestosql.plugin.hive.HiveTableHandle) FileSystem(org.apache.hadoop.fs.FileSystem) HiveWriteUtils(io.prestosql.plugin.hive.HiveWriteUtils) HiveUtil.hiveColumnHandles(io.prestosql.plugin.hive.HiveUtil.hiveColumnHandles) MetastoreUtil(io.prestosql.plugin.hive.metastore.MetastoreUtil) TableAlreadyExistsException(io.prestosql.spi.connector.TableAlreadyExistsException) ConnectorVacuumTableHandle(io.prestosql.spi.connector.ConnectorVacuumTableHandle) StringUtils(org.apache.commons.lang3.StringUtils) CarbonLockFactory(org.apache.carbondata.core.locks.CarbonLockFactory) HiveUtil.getPartitionKeyColumnHandles(io.prestosql.plugin.hive.HiveUtil.getPartitionKeyColumnHandles) ConnectorDeleteAsInsertTableHandle(io.prestosql.spi.connector.ConnectorDeleteAsInsertTableHandle) CarbonCommonConstants(org.apache.carbondata.core.constants.CarbonCommonConstants) Future(java.util.concurrent.Future) TableNotFoundException(io.prestosql.spi.connector.TableNotFoundException) ConnectorUpdateTableHandle(io.prestosql.spi.connector.ConnectorUpdateTableHandle) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) HIVE_STRING(io.prestosql.plugin.hive.HiveType.HIVE_STRING) StringEscapeUtils(org.apache.commons.lang3.StringEscapeUtils) HiveErrorCode(io.prestosql.plugin.hive.HiveErrorCode) ThriftWrapperSchemaConverterImpl(org.apache.carbondata.core.metadata.converter.ThriftWrapperSchemaConverterImpl) TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) Set(java.util.Set) LOCATION_PROPERTY(io.prestosql.plugin.hive.HiveTableProperties.LOCATION_PROPERTY) HiveTableProperties.getTransactionalValue(io.prestosql.plugin.hive.HiveTableProperties.getTransactionalValue) HiveOutputTableHandle(io.prestosql.plugin.hive.HiveOutputTableHandle) Collectors.joining(java.util.stream.Collectors.joining) BlockMappingVO(org.apache.carbondata.core.mutate.data.BlockMappingVO) CarbonLoadModel(org.apache.carbondata.processing.loading.model.CarbonLoadModel) META_TABLE_NAME(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_NAME) Table(io.prestosql.plugin.hive.metastore.Table) GENERIC_INTERNAL_ERROR(io.prestosql.spi.StandardErrorCode.GENERIC_INTERNAL_ERROR) AccessControlMetadata(io.prestosql.plugin.hive.security.AccessControlMetadata) TableOptionConstant(org.apache.carbondata.processing.util.TableOptionConstant) SortingColumn(io.prestosql.plugin.hive.metastore.SortingColumn) PartitionInfo(org.apache.carbondata.core.metadata.schema.PartitionInfo) TypeTranslator(io.prestosql.plugin.hive.TypeTranslator) ConnectorVacuumTableInfo(io.prestosql.spi.connector.ConnectorVacuumTableInfo) MapredCarbonOutputFormat(org.apache.carbondata.hive.MapredCarbonOutputFormat) StructField(org.apache.carbondata.core.metadata.datatype.StructField) CarbonUtil(org.apache.carbondata.core.util.CarbonUtil) CarbondataTableProperties.getCarbondataLocation(io.hetu.core.plugin.carbondata.CarbondataTableProperties.getCarbondataLocation) HiveWriterFactory(io.prestosql.plugin.hive.HiveWriterFactory) Database(io.prestosql.plugin.hive.metastore.Database) SchemaEvolutionEntry(org.apache.carbondata.core.metadata.schema.SchemaEvolutionEntry) Slice(io.airlift.slice.Slice) Partition(io.prestosql.plugin.hive.metastore.Partition) TRANSACTIONAL(io.prestosql.plugin.hive.HiveTableProperties.TRANSACTIONAL) DataTypes(org.apache.carbondata.core.metadata.datatype.DataTypes) CarbonDataMergerUtil(org.apache.carbondata.processing.merger.CarbonDataMergerUtil) SimpleDateFormat(java.text.SimpleDateFormat) ComputedStatistics(io.prestosql.spi.statistics.ComputedStatistics) CarbondataTableReader(io.hetu.core.plugin.carbondata.impl.CarbondataTableReader) ArrayList(java.util.ArrayList) HdfsEnvironment(io.prestosql.plugin.hive.HdfsEnvironment) ThreadLocalSessionInfo(org.apache.carbondata.core.util.ThreadLocalSessionInfo) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) LocationService(io.prestosql.plugin.hive.LocationService) LockUsage(org.apache.carbondata.core.locks.LockUsage) CarbonUpdateUtil(org.apache.carbondata.core.mutate.CarbonUpdateUtil) SemiTransactionalHiveMetastore(io.prestosql.plugin.hive.metastore.SemiTransactionalHiveMetastore) SegmentStatusManager(org.apache.carbondata.core.statusmanager.SegmentStatusManager) ConnectorOutputTableHandle(io.prestosql.spi.connector.ConnectorOutputTableHandle) Properties(java.util.Properties) PartitionStatistics(io.prestosql.plugin.hive.PartitionStatistics) CarbonFile(org.apache.carbondata.core.datastore.filesystem.CarbonFile) CarbonOutputCommitter(org.apache.carbondata.hadoop.api.CarbonOutputCommitter) HiveStorageFormat(io.prestosql.plugin.hive.HiveStorageFormat) CarbonTablePath(org.apache.carbondata.core.util.path.CarbonTablePath) HiveInsertTableHandle(io.prestosql.plugin.hive.HiveInsertTableHandle) HiveTableProperties(io.prestosql.plugin.hive.HiveTableProperties) TypeManager(io.prestosql.spi.type.TypeManager) ICarbonLock(org.apache.carbondata.core.locks.ICarbonLock) IOException(java.io.IOException) CarbonTableIdentifier(org.apache.carbondata.core.metadata.CarbonTableIdentifier) PrincipalPrivileges(io.prestosql.plugin.hive.metastore.PrincipalPrivileges) ConnectorTableMetadata(io.prestosql.spi.connector.ConnectorTableMetadata) File(java.io.File) ColumnSchema(org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema) ExecutionException(java.util.concurrent.ExecutionException) OutputCommitter(org.apache.hadoop.mapreduce.OutputCommitter) TreeMap(java.util.TreeMap) ColumnHandle(io.prestosql.spi.connector.ColumnHandle) AbsoluteTableIdentifier(org.apache.carbondata.core.metadata.AbsoluteTableIdentifier) HiveWrittenPartitions(io.prestosql.plugin.hive.HiveWrittenPartitions) TableType(org.apache.hadoop.hive.metastore.TableType) META_TABLE_LOCATION(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_LOCATION) ConfigurationUtils(io.prestosql.plugin.hive.util.ConfigurationUtils) LocationHandle(io.prestosql.plugin.hive.LocationHandle) CarbonTableOutputFormat(org.apache.carbondata.hadoop.api.CarbonTableOutputFormat) CarbonMetadata(org.apache.carbondata.core.metadata.CarbonMetadata) HiveBasicStatistics(io.prestosql.plugin.hive.HiveBasicStatistics) HivePartitionManager(io.prestosql.plugin.hive.HivePartitionManager) ThriftWriter(org.apache.carbondata.core.writer.ThriftWriter) HiveTypeName(io.prestosql.plugin.hive.HiveTypeName) Date(java.util.Date) SYNTHESIZED(io.prestosql.plugin.hive.HiveColumnHandle.ColumnType.SYNTHESIZED) HiveColumnHandle(io.prestosql.plugin.hive.HiveColumnHandle) Duration(io.airlift.units.Duration) SegmentFileStore(org.apache.carbondata.core.metadata.SegmentFileStore) TaskType(org.apache.hadoop.mapreduce.TaskType) Logger(org.apache.log4j.Logger) ConnectorSession(io.prestosql.spi.connector.ConnectorSession) Gson(com.google.gson.Gson) Locale(java.util.Locale) HiveCarbonUtil(org.apache.carbondata.hive.util.HiveCarbonUtil) Path(org.apache.hadoop.fs.Path) SegmentUpdateStatusManager(org.apache.carbondata.core.statusmanager.SegmentUpdateStatusManager) Type(io.prestosql.spi.type.Type) CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) EncodedLoadModel(io.hetu.core.plugin.carbondata.CarbondataConstants.EncodedLoadModel) HiveBucketing(io.prestosql.plugin.hive.HiveBucketing) PrestoException(io.prestosql.spi.PrestoException) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) FileInputFormat(org.apache.hadoop.mapred.FileInputFormat) Collection(java.util.Collection) UUID(java.util.UUID) TableSchema(org.apache.carbondata.core.metadata.schema.table.TableSchema) HiveType(io.prestosql.plugin.hive.HiveType) CarbonLockUtil(org.apache.carbondata.core.locks.CarbonLockUtil) Collectors(java.util.stream.Collectors) String.format(java.lang.String.format) CarbonTableInputFormat(org.apache.carbondata.hadoop.api.CarbonTableInputFormat) List(java.util.List) TaskAttemptContextImpl(org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl) Job(org.apache.hadoop.mapreduce.Job) TableSchemaBuilder(org.apache.carbondata.core.metadata.schema.table.TableSchemaBuilder) Optional(java.util.Optional) NOT_SUPPORTED(io.prestosql.spi.StandardErrorCode.NOT_SUPPORTED) HiveStatisticsProvider(io.prestosql.plugin.hive.statistics.HiveStatisticsProvider) JsonCodec(io.airlift.json.JsonCodec) HiveBucketProperty(io.prestosql.plugin.hive.HiveBucketProperty) ConnectorOutputMetadata(io.prestosql.spi.connector.ConnectorOutputMetadata) Segment(org.apache.carbondata.core.index.Segment) HiveSessionProperties(io.prestosql.plugin.hive.HiveSessionProperties) ConnectorNewTableLayout(io.prestosql.spi.connector.ConnectorNewTableLayout) TableInfo(org.apache.carbondata.core.metadata.schema.table.TableInfo) HashMap(java.util.HashMap) TableOperation(org.apache.carbondata.core.features.TableOperation) CompactionType(org.apache.carbondata.processing.merger.CompactionType) IOConstants(org.apache.hadoop.hive.ql.io.IOConstants) HiveUtil.toPartitionValues(io.prestosql.plugin.hive.HiveUtil.toPartitionValues) FileFactory(org.apache.carbondata.core.datastore.impl.FileFactory) SegmentStatus(org.apache.carbondata.core.statusmanager.SegmentStatus) LoadMetadataDetails(org.apache.carbondata.core.statusmanager.LoadMetadataDetails) Function(java.util.function.Function) ObjectSerializationUtil(org.apache.carbondata.core.util.ObjectSerializationUtil) JobContextImpl(org.apache.hadoop.mapreduce.task.JobContextImpl) HashSet(java.util.HashSet) JobStatus(org.apache.hadoop.mapred.JobStatus) SchemaTableName(io.prestosql.spi.connector.SchemaTableName) ImmutableList(com.google.common.collect.ImmutableList) FileWriteOperation(org.apache.carbondata.core.fileoperations.FileWriteOperation) Objects.requireNonNull(java.util.Objects.requireNonNull) CarbonLoaderUtil(org.apache.carbondata.processing.util.CarbonLoaderUtil) HiveACIDWriteType(io.prestosql.plugin.hive.HiveACIDWriteType) HiveMetadata(io.prestosql.plugin.hive.HiveMetadata) LogServiceFactory(org.apache.carbondata.common.logging.LogServiceFactory) JobID(org.apache.hadoop.mapreduce.JobID) NoSuchElementException(java.util.NoSuchElementException) SegmentUpdateDetails(org.apache.carbondata.core.mutate.SegmentUpdateDetails) HiveIdentity(io.prestosql.plugin.hive.authentication.HiveIdentity) HiveUpdateTableHandle(io.prestosql.plugin.hive.HiveUpdateTableHandle) TableProcessingOperations(org.apache.carbondata.processing.loading.TableProcessingOperations) ColumnMetadata(io.prestosql.spi.connector.ColumnMetadata) ConnectorTableHandle(io.prestosql.spi.connector.ConnectorTableHandle) NON_INHERITABLE_PROPERTIES(io.prestosql.plugin.hive.HiveTableProperties.NON_INHERITABLE_PROPERTIES) SchemaNotFoundException(io.prestosql.spi.connector.SchemaNotFoundException) NoSuchMVException(org.apache.carbondata.common.exceptions.sql.NoSuchMVException) Maps(com.google.common.collect.Maps) HiveDeleteAsInsertTableHandle(io.prestosql.plugin.hive.HiveDeleteAsInsertTableHandle) RowCountDetailsVO(org.apache.carbondata.core.mutate.data.RowCountDetailsVO) CarbondataTableCacheModel(io.hetu.core.plugin.carbondata.impl.CarbondataTableCacheModel) PartitionUpdate(io.prestosql.plugin.hive.PartitionUpdate) JobConf(org.apache.hadoop.mapred.JobConf) TimeUnit(java.util.concurrent.TimeUnit) Collectors.toList(java.util.stream.Collectors.toList) ConcurrentSkipListSet(java.util.concurrent.ConcurrentSkipListSet) Column(io.prestosql.plugin.hive.metastore.Column) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Comparator(java.util.Comparator) ConnectorInsertTableHandle(io.prestosql.spi.connector.ConnectorInsertTableHandle) SchemaConverter(org.apache.carbondata.core.metadata.converter.SchemaConverter) TableSchema(org.apache.carbondata.core.metadata.schema.table.TableSchema) ArrayList(java.util.ArrayList) TableSchemaBuilder(org.apache.carbondata.core.metadata.schema.table.TableSchemaBuilder) ColumnSchema(org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema) PrestoException(io.prestosql.spi.PrestoException) SchemaEvolutionEntry(org.apache.carbondata.core.metadata.schema.SchemaEvolutionEntry) StructField(org.apache.carbondata.core.metadata.datatype.StructField) PartitionInfo(org.apache.carbondata.core.metadata.schema.PartitionInfo) HiveColumnHandle(io.prestosql.plugin.hive.HiveColumnHandle) NoSuchElementException(java.util.NoSuchElementException)

Example 34 with Partition

use of io.prestosql.plugin.hive.metastore.Partition in project hetu-core by openlookeng.

the class BackgroundHiveSplitLoader method loadPartition.

private ListenableFuture<?> loadPartition(HivePartitionMetadata partition) throws IOException {
    HivePartition hivePartition = partition.getHivePartition();
    String partitionName = hivePartition.getPartitionId();
    Properties schema = getPartitionSchema(table, partition.getPartition());
    List<HivePartitionKey> partitionKeys = getPartitionKeys(table, partition.getPartition());
    TupleDomain<HiveColumnHandle> effectivePredicate = (TupleDomain<HiveColumnHandle>) compactEffectivePredicate;
    if (dynamicFilterSupplier != null && isDynamicFilteringSplitFilteringEnabled(session)) {
        if (isPartitionFiltered(partitionKeys, dynamicFilterSupplier.get(), typeManager)) {
            // Avoid listing files and creating splits from a partition if it has been pruned due to dynamic filters
            return COMPLETED_FUTURE;
        }
    }
    Path path = new Path(getPartitionLocation(table, partition.getPartition()));
    InputFormat<?, ?> inputFormat = getInputFormat(configuration, schema, false, jobConf);
    FileSystem fs = hdfsEnvironment.getFileSystem(hdfsContext, path);
    boolean s3SelectPushdownEnabled = shouldEnablePushdownForTable(session, table, path.toString(), partition.getPartition());
    if (inputFormat instanceof SymlinkTextInputFormat) {
        if (tableBucketInfo.isPresent()) {
            throw new PrestoException(NOT_SUPPORTED, "Bucketed table in SymlinkTextInputFormat is not yet supported");
        }
        // TODO: This should use an iterator like the HiveFileIterator
        ListenableFuture<?> lastResult = COMPLETED_FUTURE;
        for (Path targetPath : getTargetPathsFromSymlink(fs, path)) {
            // The input should be in TextInputFormat.
            TextInputFormat targetInputFormat = new TextInputFormat();
            // the splits must be generated using the file system for the target path
            // get the configuration for the target path -- it may be a different hdfs instance
            FileSystem targetFilesystem = hdfsEnvironment.getFileSystem(hdfsContext, targetPath);
            jobConf.setInputFormat(TextInputFormat.class);
            targetInputFormat.configure(jobConf);
            FileInputFormat.setInputPaths(jobConf, targetPath);
            InputSplit[] targetSplits = targetInputFormat.getSplits(jobConf, 0);
            InternalHiveSplitFactory splitFactory = new InternalHiveSplitFactory(targetFilesystem, partitionName, inputFormat, schema, partitionKeys, effectivePredicate, partition.getColumnCoercions(), Optional.empty(), isForceLocalScheduling(session), s3SelectPushdownEnabled);
            lastResult = addSplitsToSource(targetSplits, splitFactory);
            if (stopped) {
                return COMPLETED_FUTURE;
            }
        }
        return lastResult;
    }
    Optional<BucketConversion> bucketConversion = Optional.empty();
    boolean bucketConversionRequiresWorkerParticipation = false;
    if (partition.getPartition().isPresent()) {
        Optional<HiveBucketProperty> partitionBucketProperty = partition.getPartition().get().getStorage().getBucketProperty();
        if (tableBucketInfo.isPresent() && partitionBucketProperty.isPresent()) {
            int readBucketCount = tableBucketInfo.get().getReadBucketCount();
            // TODO can partition's bucketing_version be different from table's?
            BucketingVersion bucketingVersion = partitionBucketProperty.get().getBucketingVersion();
            int partitionBucketCount = partitionBucketProperty.get().getBucketCount();
            // Here, it's just trying to see if its needs the BucketConversion.
            if (readBucketCount != partitionBucketCount) {
                bucketConversion = Optional.of(new BucketConversion(bucketingVersion, readBucketCount, partitionBucketCount, tableBucketInfo.get().getBucketColumns()));
                if (readBucketCount > partitionBucketCount) {
                    bucketConversionRequiresWorkerParticipation = true;
                }
            }
        }
    }
    InternalHiveSplitFactory splitFactory = new InternalHiveSplitFactory(fs, partitionName, inputFormat, schema, partitionKeys, effectivePredicate, partition.getColumnCoercions(), bucketConversionRequiresWorkerParticipation ? bucketConversion : Optional.empty(), isForceLocalScheduling(session), s3SelectPushdownEnabled);
    // on the input format to obtain file splits.
    if (!isHudiParquetInputFormat(inputFormat) && shouldUseFileSplitsFromInputFormat(inputFormat)) {
        if (tableBucketInfo.isPresent()) {
            throw new PrestoException(NOT_SUPPORTED, "Presto cannot read bucketed partition in an input format with UseFileSplitsFromInputFormat annotation: " + inputFormat.getClass().getSimpleName());
        }
        if (AcidUtils.isTransactionalTable(table.getParameters())) {
            throw new PrestoException(NOT_SUPPORTED, "Hive transactional tables in an input format with UseFileSplitsFromInputFormat annotation are not supported: " + inputFormat.getClass().getSimpleName());
        }
        FileInputFormat.setInputPaths(jobConf, path);
        InputSplit[] splits = inputFormat.getSplits(jobConf, 0);
        return addSplitsToSource(splits, splitFactory);
    }
    PathFilter pathFilter = isHudiParquetInputFormat(inputFormat) ? hoodiePathFilterSupplier.get() : path1 -> true;
    // S3 Select pushdown works at the granularity of individual S3 objects,
    // therefore we must not split files when it is enabled.
    boolean splittable = getHeaderCount(schema) == 0 && getFooterCount(schema) == 0 && !s3SelectPushdownEnabled;
    List<Path> readPaths;
    Optional<DeleteDeltaLocations> deleteDeltaLocations;
    long min = Long.MAX_VALUE;
    long max = Long.MIN_VALUE;
    if (AcidUtils.isTransactionalTable(table.getParameters())) {
        boolean isVacuum = queryType.map(type -> type == QueryType.VACUUM).orElse(false);
        AcidUtils.Directory directory = hdfsEnvironment.doAs(hdfsContext.getIdentity().getUser(), () -> {
            ValidWriteIdList writeIdList = validWriteIds.orElseThrow(() -> new IllegalStateException("No validWriteIds present"));
            if (isVacuum) {
                writeIdList = new ValidCompactorWriteIdList(writeIdList.writeToString()) {

                    @Override
                    public RangeResponse isWriteIdRangeValid(long minWriteId, long maxWriteId) {
                        // For unknown reasons.. ValidCompactorWriteIdList#isWriteIdRangeValid() doesnot
                        // check for aborted transactions and AcidUtils.getAcidState() adds aborted transaction to both aborted and working lists.
                        // Avoid this by overriding.
                        RangeResponse writeIdRangeValid = super.isWriteIdRangeValid(minWriteId, maxWriteId);
                        if (writeIdRangeValid == RangeResponse.NONE) {
                            return RangeResponse.NONE;
                        } else if (super.isWriteIdRangeAborted(minWriteId, maxWriteId) == RangeResponse.ALL) {
                            return RangeResponse.NONE;
                        }
                        return writeIdRangeValid;
                    }
                };
            }
            return AcidUtils.getAcidState(path, configuration, writeIdList, Ref.from(false), true, table.getParameters());
        });
        if (AcidUtils.isFullAcidTable(table.getParameters())) {
            // From Hive version >= 3.0, delta/base files will always have file '_orc_acid_version' with value >= '2'.
            Path baseOrDeltaPath = directory.getBaseDirectory() != null ? directory.getBaseDirectory() : (directory.getCurrentDirectories().size() > 0 ? directory.getCurrentDirectories().get(0).getPath() : null);
            if (baseOrDeltaPath != null && AcidUtils.OrcAcidVersion.getAcidVersionFromMetaFile(baseOrDeltaPath, fs) < 2) {
                throw new PrestoException(NOT_SUPPORTED, "Hive transactional tables are supported with Hive 3.0 and only after a major compaction has been run");
            }
        }
        readPaths = new ArrayList<>();
        boolean isFullVacuum = isVacuum ? Boolean.valueOf(queryInfo.get("FULL").toString()) : false;
        if (isFullVacuum) {
            // Base will contain everything
            min = 0;
        }
        // In case of vacuum, include only in case of Full vacuum.
        if (directory.getBaseDirectory() != null && (!isVacuum || isFullVacuum)) {
            readPaths.add(directory.getBaseDirectory());
            if (isVacuum) {
                min = 0;
                max = AcidUtils.parseBase(directory.getBaseDirectory());
            }
        }
        // delta directories
        for (AcidUtils.ParsedDelta delta : directory.getCurrentDirectories()) {
            if (!delta.isDeleteDelta()) {
                readPaths.add(delta.getPath());
            } else // In case of Minor compaction, all delete_delta files should be compacted separately,
            if (isVacuum && !isFullVacuum) {
                readPaths.add(delta.getPath());
            }
            if (isVacuum) {
                min = Math.min(delta.getMinWriteId(), min);
                max = Math.max(delta.getMaxWriteId(), max);
            }
        }
        // Create a registry of delete_delta directories for the partition
        DeleteDeltaLocations.Builder deleteDeltaLocationsBuilder = DeleteDeltaLocations.builder(path);
        for (AcidUtils.ParsedDelta delta : directory.getCurrentDirectories()) {
            // In case of minor compaction, delete_delta directories should not be used for masking.
            if (delta.isDeleteDelta() && (!isVacuum || isFullVacuum)) {
                // For unknown reasons ParseDelta.getStatementId() returns 0, though parsed statement is -1;
                // This creates issue while trying to locate the delete_delta directory.
                // So parsing again.
                OptionalInt statementId = getStatementId(delta.getPath().getName());
                int stmtId = statementId.orElse(0);
                deleteDeltaLocationsBuilder.addDeleteDelta(delta.getPath(), delta.getMinWriteId(), delta.getMaxWriteId(), stmtId);
            }
        }
        deleteDeltaLocations = deleteDeltaLocationsBuilder.build();
        if (!directory.getOriginalFiles().isEmpty()) {
            LOG.info("Now supporting read from non-ACID files in ACID reader");
            // non-ACID file
            int numberOfBuckets = Integer.parseInt(schema.getProperty("bucket_count"));
            long[] bucketStartRowOffset = new long[Integer.max(numberOfBuckets, 1)];
            for (HadoopShims.HdfsFileStatusWithId f : directory.getOriginalFiles()) {
                Path currFilePath = f.getFileStatus().getPath();
                int currBucketNumber = getBucketNumber(currFilePath.getName()).getAsInt();
                fileIterators.addLast(createInternalHiveSplitIterator(currFilePath, fs, splitFactory, splittable, deleteDeltaLocations, Optional.of(bucketStartRowOffset[currBucketNumber]), pathFilter));
                try {
                    Reader copyReader = OrcFile.createReader(f.getFileStatus().getPath(), OrcFile.readerOptions(configuration));
                    bucketStartRowOffset[currBucketNumber] += copyReader.getNumberOfRows();
                } catch (Exception e) {
                    throw new PrestoException(NOT_SUPPORTED, e.getMessage());
                }
            }
        }
        if (isVacuum && !readPaths.isEmpty()) {
            Object vacuumHandle = queryInfo.get("vacuumHandle");
            if (vacuumHandle != null && vacuumHandle instanceof HiveVacuumTableHandle) {
                HiveVacuumTableHandle hiveVacuumTableHandle = (HiveVacuumTableHandle) vacuumHandle;
                hiveVacuumTableHandle.addRange(partitionName, new Range(min, max));
            }
        }
    } else {
        readPaths = ImmutableList.of(path);
        deleteDeltaLocations = Optional.empty();
    }
    // Bucketed partitions are fully loaded immediately since all files must be loaded to determine the file to bucket mapping
    if (tableBucketInfo.isPresent()) {
        // TODO document in addToQueue() that it is sufficient to hold on to last returned future
        ListenableFuture<?> lastResult = immediateFuture(null);
        for (Path readPath : readPaths) {
            lastResult = hiveSplitSource.addToQueue(getBucketedSplits(readPath, fs, splitFactory, tableBucketInfo.get(), bucketConversion, getDeleteDeltaLocationFor(readPath, deleteDeltaLocations), pathFilter));
        }
        return lastResult;
    }
    for (Path readPath : readPaths) {
        fileIterators.addLast(createInternalHiveSplitIterator(readPath, fs, splitFactory, splittable, getDeleteDeltaLocationFor(readPath, deleteDeltaLocations), Optional.empty(), pathFilter));
    }
    return COMPLETED_FUTURE;
}
Also used : ArrayListMultimap(com.google.common.collect.ArrayListMultimap) Arrays(java.util.Arrays) ListMultimap(com.google.common.collect.ListMultimap) FileSystem(org.apache.hadoop.fs.FileSystem) Range(io.prestosql.plugin.hive.HiveVacuumTableHandle.Range) FileStatus(org.apache.hadoop.fs.FileStatus) FileSplit(org.apache.hadoop.mapred.FileSplit) Matcher(java.util.regex.Matcher) BucketingVersion(io.prestosql.plugin.hive.HiveBucketing.BucketingVersion) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) HiveUtil.getFooterCount(io.prestosql.plugin.hive.HiveUtil.getFooterCount) HiveSessionProperties.isForceLocalScheduling(io.prestosql.plugin.hive.HiveSessionProperties.isForceLocalScheduling) InternalHiveSplitFactory(io.prestosql.plugin.hive.util.InternalHiveSplitFactory) HadoopShims(org.apache.hadoop.hive.shims.HadoopShims) HiveUtil.getHeaderCount(io.prestosql.plugin.hive.HiveUtil.getHeaderCount) Set(java.util.Set) StandardCharsets(java.nio.charset.StandardCharsets) FAIL(io.prestosql.plugin.hive.util.HiveFileIterator.NestedDirectoryPolicy.FAIL) Table(io.prestosql.plugin.hive.metastore.Table) ResumableTask(io.prestosql.plugin.hive.util.ResumableTask) Partition(io.prestosql.plugin.hive.metastore.Partition) ReentrantReadWriteLock(java.util.concurrent.locks.ReentrantReadWriteLock) Supplier(java.util.function.Supplier) ArrayList(java.util.ArrayList) HiveFileIterator(io.prestosql.plugin.hive.util.HiveFileIterator) MetastoreUtil.getPartitionLocation(io.prestosql.plugin.hive.metastore.MetastoreUtil.getPartitionLocation) Futures.immediateFuture(com.google.common.util.concurrent.Futures.immediateFuture) Properties(java.util.Properties) Executor(java.util.concurrent.Executor) TypeManager(io.prestosql.spi.type.TypeManager) HoodieROTablePathFilter(org.apache.hudi.hadoop.HoodieROTablePathFilter) IOException(java.io.IOException) InputStreamReader(java.io.InputStreamReader) ValidCompactorWriteIdList(org.apache.hadoop.hive.common.ValidCompactorWriteIdList) MetastoreUtil.getHiveSchema(io.prestosql.plugin.hive.metastore.MetastoreUtil.getHiveSchema) ColumnHandle(io.prestosql.spi.connector.ColumnHandle) InputSplit(org.apache.hadoop.mapred.InputSplit) ConfigurationUtils(io.prestosql.plugin.hive.util.ConfigurationUtils) BufferedReader(java.io.BufferedReader) OrcFile(org.apache.hadoop.hive.ql.io.orc.OrcFile) ValidWriteIdList(org.apache.hadoop.hive.common.ValidWriteIdList) DynamicFilter(io.prestosql.spi.dynamicfilter.DynamicFilter) QueryType(io.prestosql.spi.resourcegroups.QueryType) HdfsContext(io.prestosql.plugin.hive.HdfsEnvironment.HdfsContext) HoodieParquetRealtimeInputFormat(org.apache.hudi.hadoop.realtime.HoodieParquetRealtimeInputFormat) IntPredicate(java.util.function.IntPredicate) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) ConnectorSession(io.prestosql.spi.connector.ConnectorSession) CharStreams(com.google.common.io.CharStreams) InputFormat(org.apache.hadoop.mapred.InputFormat) Path(org.apache.hadoop.fs.Path) Reader(org.apache.hadoop.hive.ql.io.orc.Reader) PrestoException(io.prestosql.spi.PrestoException) HiveSessionProperties.isDynamicFilteringSplitFilteringEnabled(io.prestosql.plugin.hive.HiveSessionProperties.isDynamicFilteringSplitFilteringEnabled) FileInputFormat(org.apache.hadoop.mapred.FileInputFormat) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) Streams(com.google.common.collect.Streams) Collectors(java.util.stream.Collectors) String.format(java.lang.String.format) Preconditions.checkState(com.google.common.base.Preconditions.checkState) RECURSE(io.prestosql.plugin.hive.util.HiveFileIterator.NestedDirectoryPolicy.RECURSE) List(java.util.List) Annotation(java.lang.annotation.Annotation) HIDDEN_FILES_PATH_FILTER(org.apache.hadoop.hive.common.FileUtils.HIDDEN_FILES_PATH_FILTER) Optional(java.util.Optional) Math.max(java.lang.Math.max) NOT_SUPPORTED(io.prestosql.spi.StandardErrorCode.NOT_SUPPORTED) Pattern(java.util.regex.Pattern) IGNORED(io.prestosql.plugin.hive.util.HiveFileIterator.NestedDirectoryPolicy.IGNORED) AcidUtils(org.apache.hadoop.hive.ql.io.AcidUtils) TextInputFormat(org.apache.hadoop.mapred.TextInputFormat) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) Logger(io.airlift.log.Logger) PathFilter(org.apache.hadoop.fs.PathFilter) Deque(java.util.Deque) OptionalInt(java.util.OptionalInt) HiveUtil.getInputFormat(io.prestosql.plugin.hive.HiveUtil.getInputFormat) Iterators(com.google.common.collect.Iterators) SymlinkTextInputFormat(org.apache.hadoop.hive.ql.io.SymlinkTextInputFormat) ImmutableList(com.google.common.collect.ImmutableList) HiveUtil.checkCondition(io.prestosql.plugin.hive.HiveUtil.checkCondition) Objects.requireNonNull(java.util.Objects.requireNonNull) Suppliers(com.google.common.base.Suppliers) HoodieParquetInputFormat(org.apache.hudi.hadoop.HoodieParquetInputFormat) S3SelectPushdown.shouldEnablePushdownForTable(io.prestosql.plugin.hive.S3SelectPushdown.shouldEnablePushdownForTable) Ref(org.apache.hive.common.util.Ref) Iterator(java.util.Iterator) ResumableTasks(io.prestosql.plugin.hive.util.ResumableTasks) TupleDomain(io.prestosql.spi.predicate.TupleDomain) BucketConversion(io.prestosql.plugin.hive.HiveSplit.BucketConversion) NestedDirectoryNotAllowedException(io.prestosql.plugin.hive.util.HiveFileIterator.NestedDirectoryNotAllowedException) HiveUtil.isPartitionFiltered(io.prestosql.plugin.hive.HiveUtil.isPartitionFiltered) HiveUtil.getBucketNumber(io.prestosql.plugin.hive.HiveUtil.getBucketNumber) ConcurrentLinkedDeque(java.util.concurrent.ConcurrentLinkedDeque) JobConf(org.apache.hadoop.mapred.JobConf) Column(io.prestosql.plugin.hive.metastore.Column) FileSystem(org.apache.hadoop.fs.FileSystem) InternalHiveSplitFactory(io.prestosql.plugin.hive.util.InternalHiveSplitFactory) Range(io.prestosql.plugin.hive.HiveVacuumTableHandle.Range) HadoopShims(org.apache.hadoop.hive.shims.HadoopShims) TupleDomain(io.prestosql.spi.predicate.TupleDomain) AcidUtils(org.apache.hadoop.hive.ql.io.AcidUtils) HoodieROTablePathFilter(org.apache.hudi.hadoop.HoodieROTablePathFilter) PathFilter(org.apache.hadoop.fs.PathFilter) InputStreamReader(java.io.InputStreamReader) BufferedReader(java.io.BufferedReader) Reader(org.apache.hadoop.hive.ql.io.orc.Reader) PrestoException(io.prestosql.spi.PrestoException) Properties(java.util.Properties) SymlinkTextInputFormat(org.apache.hadoop.hive.ql.io.SymlinkTextInputFormat) BucketingVersion(io.prestosql.plugin.hive.HiveBucketing.BucketingVersion) InputSplit(org.apache.hadoop.mapred.InputSplit) Path(org.apache.hadoop.fs.Path) OptionalInt(java.util.OptionalInt) IOException(java.io.IOException) PrestoException(io.prestosql.spi.PrestoException) NestedDirectoryNotAllowedException(io.prestosql.plugin.hive.util.HiveFileIterator.NestedDirectoryNotAllowedException) ValidCompactorWriteIdList(org.apache.hadoop.hive.common.ValidCompactorWriteIdList) TextInputFormat(org.apache.hadoop.mapred.TextInputFormat) SymlinkTextInputFormat(org.apache.hadoop.hive.ql.io.SymlinkTextInputFormat) ValidWriteIdList(org.apache.hadoop.hive.common.ValidWriteIdList) BucketConversion(io.prestosql.plugin.hive.HiveSplit.BucketConversion)

Example 35 with Partition

use of io.prestosql.plugin.hive.metastore.Partition in project hetu-core by openlookeng.

the class FileHiveMetastore method alterPartition.

@Override
public synchronized void alterPartition(HiveIdentity identity, String databaseName, String tableName, PartitionWithStatistics partitionWithStatistics) {
    Table table = getRequiredTable(databaseName, tableName);
    Partition partition = partitionWithStatistics.getPartition();
    verifiedPartition(table, partition);
    Path partitionMetadataDirectory = getPartitionMetadataDirectory(table, partition.getValues());
    writeSchemaFile("partition", partitionMetadataDirectory, partitionCodec, new PartitionMetadata(table, partitionWithStatistics), true);
}
Also used : Path(org.apache.hadoop.fs.Path) HivePartition(io.prestosql.plugin.hive.HivePartition) Partition(io.prestosql.plugin.hive.metastore.Partition) Table(io.prestosql.plugin.hive.metastore.Table)

Aggregations

Partition (io.prestosql.plugin.hive.metastore.Partition)62 PrestoException (io.prestosql.spi.PrestoException)42 Table (io.prestosql.plugin.hive.metastore.Table)41 Optional (java.util.Optional)37 ImmutableMap (com.google.common.collect.ImmutableMap)33 List (java.util.List)33 Map (java.util.Map)33 Path (org.apache.hadoop.fs.Path)33 ImmutableList (com.google.common.collect.ImmutableList)31 HiveIdentity (io.prestosql.plugin.hive.authentication.HiveIdentity)29 Column (io.prestosql.plugin.hive.metastore.Column)29 ConnectorSession (io.prestosql.spi.connector.ConnectorSession)29 SchemaTableName (io.prestosql.spi.connector.SchemaTableName)29 HdfsContext (io.prestosql.plugin.hive.HdfsEnvironment.HdfsContext)28 Objects.requireNonNull (java.util.Objects.requireNonNull)27 Set (java.util.Set)27 ImmutableMap.toImmutableMap (com.google.common.collect.ImmutableMap.toImmutableMap)26 ImmutableSet (com.google.common.collect.ImmutableSet)25 NOT_SUPPORTED (io.prestosql.spi.StandardErrorCode.NOT_SUPPORTED)25 TableNotFoundException (io.prestosql.spi.connector.TableNotFoundException)25