Search in sources :

Example 26 with PrestoException

use of com.facebook.presto.spi.PrestoException in project presto by prestodb.

the class HiveMetadata method beginInsert.

@Override
public HiveInsertTableHandle beginInsert(ConnectorSession session, ConnectorTableHandle tableHandle) {
    verifyJvmTimeZone();
    SchemaTableName tableName = schemaTableName(tableHandle);
    Optional<Table> table = metastore.getTable(tableName.getSchemaName(), tableName.getTableName());
    if (!table.isPresent()) {
        throw new TableNotFoundException(tableName);
    }
    checkTableIsWritable(table.get());
    for (Column column : table.get().getDataColumns()) {
        if (!isWritableType(column.getType())) {
            throw new PrestoException(NOT_SUPPORTED, format("Inserting into Hive table %s.%s with column type %s not supported", table.get().getDatabaseName(), table.get().getTableName(), column.getType()));
        }
    }
    List<HiveColumnHandle> handles = hiveColumnHandles(connectorId, table.get()).stream().filter(columnHandle -> !columnHandle.isHidden()).collect(toList());
    HiveStorageFormat tableStorageFormat = extractHiveStorageFormat(table.get());
    LocationHandle locationHandle = locationService.forExistingTable(metastore, session.getUser(), session.getQueryId(), table.get());
    HiveInsertTableHandle result = new HiveInsertTableHandle(connectorId, tableName.getSchemaName(), tableName.getTableName(), handles, session.getQueryId(), metastore.generatePageSinkMetadata(tableName), locationHandle, table.get().getStorage().getBucketProperty(), tableStorageFormat, respectTableFormat ? tableStorageFormat : defaultStorageFormat);
    Optional<Path> writePathRoot = locationService.writePathRoot(locationHandle);
    Path targetPathRoot = locationService.targetPathRoot(locationHandle);
    if (writePathRoot.isPresent()) {
        WriteMode mode = writePathRoot.get().equals(targetPathRoot) ? DIRECT_TO_TARGET_NEW_DIRECTORY : STAGE_AND_MOVE_TO_TARGET_DIRECTORY;
        metastore.declareIntentionToWrite(session, mode, writePathRoot.get(), result.getFilePrefix(), tableName);
    } else {
        metastore.declareIntentionToWrite(session, DIRECT_TO_TARGET_EXISTING_DIRECTORY, targetPathRoot, result.getFilePrefix(), tableName);
    }
    return result;
}
Also used : DateTimeZone(org.joda.time.DateTimeZone) TypeManager(com.facebook.presto.spi.type.TypeManager) HiveTableProperties.getPartitionedBy(com.facebook.presto.hive.HiveTableProperties.getPartitionedBy) Map(java.util.Map) INVALID_TABLE_PROPERTY(com.facebook.presto.spi.StandardErrorCode.INVALID_TABLE_PROPERTY) HivePartitionManager.extractPartitionKeyValues(com.facebook.presto.hive.HivePartitionManager.extractPartitionKeyValues) WriteMode(com.facebook.presto.hive.metastore.SemiTransactionalHiveMetastore.WriteMode) Verify(com.google.common.base.Verify) StorageFormat(com.facebook.presto.hive.metastore.StorageFormat) HIVE_INVALID_METADATA(com.facebook.presto.hive.HiveErrorCode.HIVE_INVALID_METADATA) PrincipalPrivileges(com.facebook.presto.hive.metastore.PrincipalPrivileges) PATH_COLUMN_NAME(com.facebook.presto.hive.HiveColumnHandle.PATH_COLUMN_NAME) Set(java.util.Set) HivePrivilegeInfo.toHivePrivilege(com.facebook.presto.hive.metastore.HivePrivilegeInfo.toHivePrivilege) SemiTransactionalHiveMetastore(com.facebook.presto.hive.metastore.SemiTransactionalHiveMetastore) MANAGED_TABLE(org.apache.hadoop.hive.metastore.TableType.MANAGED_TABLE) ConnectorSession(com.facebook.presto.spi.ConnectorSession) Domain(com.facebook.presto.spi.predicate.Domain) HiveColumnHandle.updateRowIdHandle(com.facebook.presto.hive.HiveColumnHandle.updateRowIdHandle) HivePrivilegeInfo(com.facebook.presto.hive.metastore.HivePrivilegeInfo) MoreObjects.toStringHelper(com.google.common.base.MoreObjects.toStringHelper) Iterables(com.google.common.collect.Iterables) Table(com.facebook.presto.hive.metastore.Table) Slice(io.airlift.slice.Slice) Database(com.facebook.presto.hive.metastore.Database) REGULAR(com.facebook.presto.hive.HiveColumnHandle.ColumnType.REGULAR) BUCKET_COLUMN_NAME(com.facebook.presto.hive.HiveColumnHandle.BUCKET_COLUMN_NAME) HiveUtil.columnExtraInfo(com.facebook.presto.hive.HiveUtil.columnExtraInfo) HiveTableProperties.getBucketProperty(com.facebook.presto.hive.HiveTableProperties.getBucketProperty) ConnectorOutputTableHandle(com.facebook.presto.spi.ConnectorOutputTableHandle) ArrayList(java.util.ArrayList) OptionalLong(java.util.OptionalLong) DIRECT_TO_TARGET_EXISTING_DIRECTORY(com.facebook.presto.hive.metastore.SemiTransactionalHiveMetastore.WriteMode.DIRECT_TO_TARGET_EXISTING_DIRECTORY) Type(com.facebook.presto.spi.type.Type) HiveUtil.encodeViewData(com.facebook.presto.hive.HiveUtil.encodeViewData) ImmutableMultimap(com.google.common.collect.ImmutableMultimap) HiveTableProperties.getHiveStorageFormat(com.facebook.presto.hive.HiveTableProperties.getHiveStorageFormat) HiveWriteUtils.checkTableIsWritable(com.facebook.presto.hive.HiveWriteUtils.checkTableIsWritable) ConnectorTableMetadata(com.facebook.presto.spi.ConnectorTableMetadata) PRESTO_VIEW_FLAG(com.facebook.presto.hive.HiveUtil.PRESTO_VIEW_FLAG) Properties(java.util.Properties) HiveWriteUtils.initializeSerializer(com.facebook.presto.hive.HiveWriteUtils.initializeSerializer) HIVE_CONCURRENT_MODIFICATION_DETECTED(com.facebook.presto.hive.HiveErrorCode.HIVE_CONCURRENT_MODIFICATION_DETECTED) Constraint(com.facebook.presto.spi.Constraint) IOException(java.io.IOException) ConnectorTableLayoutResult(com.facebook.presto.spi.ConnectorTableLayoutResult) HivePrivilege(com.facebook.presto.hive.metastore.HivePrivilegeInfo.HivePrivilege) SchemaTablePrefix(com.facebook.presto.spi.SchemaTablePrefix) ColumnHandle(com.facebook.presto.spi.ColumnHandle) ConnectorNewTableLayout(com.facebook.presto.spi.ConnectorNewTableLayout) TableType(org.apache.hadoop.hive.metastore.TableType) HiveUtil.hiveColumnHandles(com.facebook.presto.hive.HiveUtil.hiveColumnHandles) DIRECT_TO_TARGET_NEW_DIRECTORY(com.facebook.presto.hive.metastore.SemiTransactionalHiveMetastore.WriteMode.DIRECT_TO_TARGET_NEW_DIRECTORY) ConnectorViewDefinition(com.facebook.presto.spi.ConnectorViewDefinition) ViewNotFoundException(com.facebook.presto.spi.ViewNotFoundException) HiveTableProperties.getExternalLocation(com.facebook.presto.hive.HiveTableProperties.getExternalLocation) EXTERNAL_TABLE(org.apache.hadoop.hive.metastore.TableType.EXTERNAL_TABLE) HiveType.toHiveType(com.facebook.presto.hive.HiveType.toHiveType) STAGE_AND_MOVE_TO_TARGET_DIRECTORY(com.facebook.presto.hive.metastore.SemiTransactionalHiveMetastore.WriteMode.STAGE_AND_MOVE_TO_TARGET_DIRECTORY) Privilege(com.facebook.presto.spi.security.Privilege) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) SchemaTableName(com.facebook.presto.spi.SchemaTableName) HiveSessionProperties.isBucketExecutionEnabled(com.facebook.presto.hive.HiveSessionProperties.isBucketExecutionEnabled) Collectors.toMap(java.util.stream.Collectors.toMap) Iterables.concat(com.google.common.collect.Iterables.concat) HIVE_COLUMN_ORDER_MISMATCH(com.facebook.presto.hive.HiveErrorCode.HIVE_COLUMN_ORDER_MISMATCH) BUCKETED_BY_PROPERTY(com.facebook.presto.hive.HiveTableProperties.BUCKETED_BY_PROPERTY) Path(org.apache.hadoop.fs.Path) DiscretePredicates(com.facebook.presto.spi.DiscretePredicates) NullableValue(com.facebook.presto.spi.predicate.NullableValue) Collectors.toSet(java.util.stream.Collectors.toSet) HIVE_UNSUPPORTED_FORMAT(com.facebook.presto.hive.HiveErrorCode.HIVE_UNSUPPORTED_FORMAT) ImmutableSet(com.google.common.collect.ImmutableSet) HIVE_UNKNOWN_ERROR(com.facebook.presto.hive.HiveErrorCode.HIVE_UNKNOWN_ERROR) ImmutableMap(com.google.common.collect.ImmutableMap) Predicate(java.util.function.Predicate) INVALID_SCHEMA_PROPERTY(com.facebook.presto.spi.StandardErrorCode.INVALID_SCHEMA_PROPERTY) Collections.emptyList(java.util.Collections.emptyList) Collection(java.util.Collection) SCHEMA_NOT_EMPTY(com.facebook.presto.spi.StandardErrorCode.SCHEMA_NOT_EMPTY) Collectors(java.util.stream.Collectors) Sets(com.google.common.collect.Sets) String.format(java.lang.String.format) STORAGE_FORMAT_PROPERTY(com.facebook.presto.hive.HiveTableProperties.STORAGE_FORMAT_PROPERTY) TupleDomain(com.facebook.presto.spi.predicate.TupleDomain) HIVE_STRING(com.facebook.presto.hive.HiveType.HIVE_STRING) List(java.util.List) ColumnMetadata(com.facebook.presto.spi.ColumnMetadata) NOT_SUPPORTED(com.facebook.presto.spi.StandardErrorCode.NOT_SUPPORTED) USER(com.facebook.presto.hive.metastore.PrincipalType.USER) Optional(java.util.Optional) HIVE_TIMEZONE_MISMATCH(com.facebook.presto.hive.HiveErrorCode.HIVE_TIMEZONE_MISMATCH) PARTITION_KEY(com.facebook.presto.hive.HiveColumnHandle.ColumnType.PARTITION_KEY) HiveUtil.schemaTableName(com.facebook.presto.hive.HiveUtil.schemaTableName) HIDDEN(com.facebook.presto.hive.HiveColumnHandle.ColumnType.HIDDEN) JsonCodec(io.airlift.json.JsonCodec) TupleDomain.withColumnDomains(com.facebook.presto.spi.predicate.TupleDomain.withColumnDomains) ConnectorMetadata(com.facebook.presto.spi.connector.ConnectorMetadata) Column(com.facebook.presto.hive.metastore.Column) ConnectorTableLayoutHandle(com.facebook.presto.spi.ConnectorTableLayoutHandle) HIVE_WRITER_CLOSE_ERROR(com.facebook.presto.hive.HiveErrorCode.HIVE_WRITER_CLOSE_ERROR) ConnectorTableHandle(com.facebook.presto.spi.ConnectorTableHandle) PrestoException(com.facebook.presto.spi.PrestoException) Function(java.util.function.Function) Partition(com.facebook.presto.hive.metastore.Partition) ImmutableList(com.google.common.collect.ImmutableList) PARTITIONED_BY_PROPERTY(com.facebook.presto.hive.HiveTableProperties.PARTITIONED_BY_PROPERTY) Verify.verify(com.google.common.base.Verify.verify) Objects.requireNonNull(java.util.Objects.requireNonNull) HiveUtil.toPartitionValues(com.facebook.presto.hive.HiveUtil.toPartitionValues) HiveWriteUtils.isWritableType(com.facebook.presto.hive.HiveWriteUtils.isWritableType) ConnectorNodePartitioning(com.facebook.presto.spi.ConnectorNodePartitioning) ConnectorInsertTableHandle(com.facebook.presto.spi.ConnectorInsertTableHandle) ConnectorOutputMetadata(com.facebook.presto.spi.connector.ConnectorOutputMetadata) ConnectorTableLayout(com.facebook.presto.spi.ConnectorTableLayout) EXTERNAL_LOCATION_PROPERTY(com.facebook.presto.hive.HiveTableProperties.EXTERNAL_LOCATION_PROPERTY) VIEW_STORAGE_FORMAT(com.facebook.presto.hive.metastore.StorageFormat.VIEW_STORAGE_FORMAT) Maps(com.google.common.collect.Maps) JobConf(org.apache.hadoop.mapred.JobConf) Collectors.toList(java.util.stream.Collectors.toList) TableNotFoundException(com.facebook.presto.spi.TableNotFoundException) BUCKET_COUNT_PROPERTY(com.facebook.presto.hive.HiveTableProperties.BUCKET_COUNT_PROPERTY) StorageFormat.fromHiveStorageFormat(com.facebook.presto.hive.metastore.StorageFormat.fromHiveStorageFormat) HiveUtil.decodeViewData(com.facebook.presto.hive.HiveUtil.decodeViewData) VisibleForTesting(com.google.common.annotations.VisibleForTesting) MetastoreUtil.getHiveSchema(com.facebook.presto.hive.metastore.MetastoreUtil.getHiveSchema) Path(org.apache.hadoop.fs.Path) Table(com.facebook.presto.hive.metastore.Table) PrestoException(com.facebook.presto.spi.PrestoException) SchemaTableName(com.facebook.presto.spi.SchemaTableName) TableNotFoundException(com.facebook.presto.spi.TableNotFoundException) Column(com.facebook.presto.hive.metastore.Column) HiveTableProperties.getHiveStorageFormat(com.facebook.presto.hive.HiveTableProperties.getHiveStorageFormat) StorageFormat.fromHiveStorageFormat(com.facebook.presto.hive.metastore.StorageFormat.fromHiveStorageFormat) WriteMode(com.facebook.presto.hive.metastore.SemiTransactionalHiveMetastore.WriteMode)

Example 27 with PrestoException

use of com.facebook.presto.spi.PrestoException in project presto by prestodb.

the class HivePartitionManager method getPartitions.

public HivePartitionResult getPartitions(SemiTransactionalHiveMetastore metastore, ConnectorTableHandle tableHandle, Constraint<ColumnHandle> constraint) {
    HiveTableHandle hiveTableHandle = (HiveTableHandle) tableHandle;
    TupleDomain<ColumnHandle> effectivePredicate = constraint.getSummary();
    SchemaTableName tableName = hiveTableHandle.getSchemaTableName();
    Table table = getTable(metastore, tableName);
    Optional<HiveBucketHandle> hiveBucketHandle = getHiveBucketHandle(connectorId, table);
    List<HiveColumnHandle> partitionColumns = getPartitionKeyColumnHandles(connectorId, table);
    List<HiveBucket> buckets = getHiveBucketNumbers(table, effectivePredicate);
    TupleDomain<HiveColumnHandle> compactEffectivePredicate = toCompactTupleDomain(effectivePredicate, domainCompactionThreshold);
    if (effectivePredicate.isNone()) {
        return new HivePartitionResult(partitionColumns, ImmutableList.of(), TupleDomain.none(), TupleDomain.none(), hiveBucketHandle);
    }
    if (partitionColumns.isEmpty()) {
        return new HivePartitionResult(partitionColumns, ImmutableList.of(new HivePartition(tableName, compactEffectivePredicate, buckets)), effectivePredicate, TupleDomain.none(), hiveBucketHandle);
    }
    List<Type> partitionTypes = partitionColumns.stream().map(column -> typeManager.getType(column.getTypeSignature())).collect(toList());
    List<String> partitionNames = getFilteredPartitionNames(metastore, tableName, partitionColumns, effectivePredicate);
    // do a final pass to filter based on fields that could not be used to filter the partitions
    int partitionCount = 0;
    ImmutableList.Builder<HivePartition> partitions = ImmutableList.builder();
    for (String partitionName : partitionNames) {
        Optional<Map<ColumnHandle, NullableValue>> values = parseValuesAndFilterPartition(partitionName, partitionColumns, partitionTypes, constraint);
        if (values.isPresent()) {
            if (partitionCount == maxPartitions) {
                throw new PrestoException(HIVE_EXCEEDED_PARTITION_LIMIT, format("Query over table '%s' can potentially read more than %s partitions", hiveTableHandle.getSchemaTableName(), maxPartitions));
            }
            partitionCount++;
            partitions.add(new HivePartition(tableName, compactEffectivePredicate, partitionName, values.get(), buckets));
        }
    }
    // All partition key domains will be fully evaluated, so we don't need to include those
    TupleDomain<ColumnHandle> remainingTupleDomain = TupleDomain.withColumnDomains(Maps.filterKeys(effectivePredicate.getDomains().get(), not(Predicates.in(partitionColumns))));
    TupleDomain<ColumnHandle> enforcedTupleDomain = TupleDomain.withColumnDomains(Maps.filterKeys(effectivePredicate.getDomains().get(), Predicates.in(partitionColumns)));
    return new HivePartitionResult(partitionColumns, partitions.build(), remainingTupleDomain, enforcedTupleDomain, hiveBucketHandle);
}
Also used : DateTimeZone(org.joda.time.DateTimeZone) Table(com.facebook.presto.hive.metastore.Table) TypeManager(com.facebook.presto.spi.type.TypeManager) Slice(io.airlift.slice.Slice) HiveUtil.getPartitionKeyColumnHandles(com.facebook.presto.hive.HiveUtil.getPartitionKeyColumnHandles) Strings.isNullOrEmpty(com.google.common.base.Strings.isNullOrEmpty) ConnectorTableHandle(com.facebook.presto.spi.ConnectorTableHandle) ProtectMode(org.apache.hadoop.hive.metastore.ProtectMode) PrestoException(com.facebook.presto.spi.PrestoException) ArrayList(java.util.ArrayList) Inject(javax.inject.Inject) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) SchemaTableName(com.facebook.presto.spi.SchemaTableName) ImmutableList(com.google.common.collect.ImmutableList) Predicates.not(com.google.common.base.Predicates.not) ValueSet(com.facebook.presto.spi.predicate.ValueSet) Type(com.facebook.presto.spi.type.Type) Map(java.util.Map) Objects.requireNonNull(java.util.Objects.requireNonNull) Predicates(com.google.common.base.Predicates) HiveBucket(com.facebook.presto.hive.HiveBucketing.HiveBucket) NullableValue(com.facebook.presto.spi.predicate.NullableValue) HIVE_EXCEEDED_PARTITION_LIMIT(com.facebook.presto.hive.HiveErrorCode.HIVE_EXCEEDED_PARTITION_LIMIT) ImmutableMap(com.google.common.collect.ImmutableMap) ProtectMode.getProtectModeFromString(org.apache.hadoop.hive.metastore.ProtectMode.getProtectModeFromString) Constraint(com.facebook.presto.spi.Constraint) SemiTransactionalHiveMetastore(com.facebook.presto.hive.metastore.SemiTransactionalHiveMetastore) HiveBucketing.getHiveBucketHandle(com.facebook.presto.hive.HiveBucketing.getHiveBucketHandle) Maps(com.google.common.collect.Maps) String.format(java.lang.String.format) TupleDomain(com.facebook.presto.spi.predicate.TupleDomain) Domain(com.facebook.presto.spi.predicate.Domain) List(java.util.List) Collectors.toList(java.util.stream.Collectors.toList) TableNotFoundException(com.facebook.presto.spi.TableNotFoundException) NOT_SUPPORTED(com.facebook.presto.spi.StandardErrorCode.NOT_SUPPORTED) ColumnHandle(com.facebook.presto.spi.ColumnHandle) HiveBucketing.getHiveBucketNumbers(com.facebook.presto.hive.HiveBucketing.getHiveBucketNumbers) FileUtils(org.apache.hadoop.hive.common.FileUtils) Optional(java.util.Optional) HiveUtil.parsePartitionValue(com.facebook.presto.hive.HiveUtil.parsePartitionValue) HiveBucket(com.facebook.presto.hive.HiveBucketing.HiveBucket) ImmutableList(com.google.common.collect.ImmutableList) PrestoException(com.facebook.presto.spi.PrestoException) ProtectMode.getProtectModeFromString(org.apache.hadoop.hive.metastore.ProtectMode.getProtectModeFromString) ColumnHandle(com.facebook.presto.spi.ColumnHandle) Table(com.facebook.presto.hive.metastore.Table) SchemaTableName(com.facebook.presto.spi.SchemaTableName) Constraint(com.facebook.presto.spi.Constraint) Type(com.facebook.presto.spi.type.Type) HiveBucketing.getHiveBucketHandle(com.facebook.presto.hive.HiveBucketing.getHiveBucketHandle) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap)

Example 28 with PrestoException

use of com.facebook.presto.spi.PrestoException in project presto by prestodb.

the class HivePartitionManager method getFilteredPartitionNames.

private List<String> getFilteredPartitionNames(SemiTransactionalHiveMetastore metastore, SchemaTableName tableName, List<HiveColumnHandle> partitionKeys, TupleDomain<ColumnHandle> effectivePredicate) {
    checkArgument(effectivePredicate.getDomains().isPresent());
    List<String> filter = new ArrayList<>();
    for (HiveColumnHandle partitionKey : partitionKeys) {
        Domain domain = effectivePredicate.getDomains().get().get(partitionKey);
        if (domain != null && domain.isNullableSingleValue()) {
            Object value = domain.getNullableSingleValue();
            if (value == null) {
                filter.add(HivePartitionKey.HIVE_DEFAULT_DYNAMIC_PARTITION);
            } else if (value instanceof Slice) {
                filter.add(((Slice) value).toStringUtf8());
            } else if ((value instanceof Boolean) || (value instanceof Double) || (value instanceof Long)) {
                if (assumeCanonicalPartitionKeys) {
                    filter.add(value.toString());
                } else {
                    // Hive treats '0', 'false', and 'False' the same. However, the metastore differentiates between these.
                    filter.add(PARTITION_VALUE_WILDCARD);
                }
            } else {
                throw new PrestoException(NOT_SUPPORTED, "Only Boolean, Double and Long partition keys are supported");
            }
        } else {
            filter.add(PARTITION_VALUE_WILDCARD);
        }
    }
    // fetch the partition names
    return metastore.getPartitionNamesByParts(tableName.getSchemaName(), tableName.getTableName(), filter).orElseThrow(() -> new TableNotFoundException(tableName));
}
Also used : TableNotFoundException(com.facebook.presto.spi.TableNotFoundException) Slice(io.airlift.slice.Slice) ArrayList(java.util.ArrayList) PrestoException(com.facebook.presto.spi.PrestoException) ProtectMode.getProtectModeFromString(org.apache.hadoop.hive.metastore.ProtectMode.getProtectModeFromString) TupleDomain(com.facebook.presto.spi.predicate.TupleDomain) Domain(com.facebook.presto.spi.predicate.Domain)

Example 29 with PrestoException

use of com.facebook.presto.spi.PrestoException in project presto by prestodb.

the class BackgroundHiveSplitLoader method listAndSortBucketFiles.

private static List<LocatedFileStatus> listAndSortBucketFiles(HiveFileIterator hiveFileIterator, int bucketCount) {
    ArrayList<LocatedFileStatus> list = new ArrayList<>(bucketCount);
    while (hiveFileIterator.hasNext()) {
        LocatedFileStatus next = hiveFileIterator.next();
        if (isDirectory(next)) {
            // Fail here to be on the safe side. This seems to be the same as what Hive does
            throw new PrestoException(HIVE_INVALID_BUCKET_FILES, format("%s Found sub-directory in bucket directory for partition: %s", CORRUPT_BUCKETING, hiveFileIterator.getPartitionName()));
        }
        list.add(next);
    }
    if (list.size() != bucketCount) {
        throw new PrestoException(HIVE_INVALID_BUCKET_FILES, format("%s The number of files in the directory (%s) does not match the declared bucket count (%s) for partition: %s", CORRUPT_BUCKETING, list.size(), bucketCount, hiveFileIterator.getPartitionName()));
    }
    // Sort FileStatus objects (instead of, e.g., fileStatus.getPath().toString). This matches org.apache.hadoop.hive.ql.metadata.Table.getSortedPaths
    list.sort(null);
    return list;
}
Also used : ArrayList(java.util.ArrayList) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus) PrestoException(com.facebook.presto.spi.PrestoException)

Example 30 with PrestoException

use of com.facebook.presto.spi.PrestoException in project presto by prestodb.

the class BackgroundHiveSplitLoader method loadPartition.

private void loadPartition(HivePartitionMetadata partition) throws IOException {
    String partitionName = partition.getHivePartition().getPartitionId();
    Properties schema = getPartitionSchema(table, partition.getPartition());
    List<HivePartitionKey> partitionKeys = getPartitionKeys(table, partition.getPartition());
    TupleDomain<HiveColumnHandle> effectivePredicate = partition.getHivePartition().getEffectivePredicate();
    Path path = new Path(getPartitionLocation(table, partition.getPartition()));
    Configuration configuration = hdfsEnvironment.getConfiguration(path);
    InputFormat<?, ?> inputFormat = getInputFormat(configuration, schema, false);
    FileSystem fs = hdfsEnvironment.getFileSystem(session.getUser(), path);
    if (inputFormat instanceof SymlinkTextInputFormat) {
        if (bucketHandle.isPresent()) {
            throw new PrestoException(StandardErrorCode.NOT_SUPPORTED, "Bucketed table in SymlinkTextInputFormat is not yet supported");
        }
        // TODO: This should use an iterator like the HiveFileIterator
        for (Path targetPath : getTargetPathsFromSymlink(fs, path)) {
            // The input should be in TextInputFormat.
            TextInputFormat targetInputFormat = new TextInputFormat();
            // get the configuration for the target path -- it may be a different hdfs instance
            Configuration targetConfiguration = hdfsEnvironment.getConfiguration(targetPath);
            JobConf targetJob = new JobConf(targetConfiguration);
            targetJob.setInputFormat(TextInputFormat.class);
            targetInputFormat.configure(targetJob);
            FileInputFormat.setInputPaths(targetJob, targetPath);
            InputSplit[] targetSplits = targetInputFormat.getSplits(targetJob, 0);
            if (addSplitsToSource(targetSplits, partitionName, partitionKeys, schema, effectivePredicate, partition.getColumnCoercions())) {
                return;
            }
        }
        return;
    }
    // on the input format to obtain file splits.
    if (shouldUseFileSplitsFromInputFormat(inputFormat)) {
        JobConf jobConf = new JobConf(configuration);
        FileInputFormat.setInputPaths(jobConf, path);
        InputSplit[] splits = inputFormat.getSplits(jobConf, 0);
        addSplitsToSource(splits, partitionName, partitionKeys, schema, effectivePredicate, partition.getColumnCoercions());
        return;
    }
    // If only one bucket could match: load that one file
    HiveFileIterator iterator = new HiveFileIterator(path, fs, directoryLister, namenodeStats, partitionName, inputFormat, schema, partitionKeys, effectivePredicate, partition.getColumnCoercions());
    if (!buckets.isEmpty()) {
        int bucketCount = buckets.get(0).getBucketCount();
        List<LocatedFileStatus> list = listAndSortBucketFiles(iterator, bucketCount);
        List<Iterator<HiveSplit>> iteratorList = new ArrayList<>();
        for (HiveBucket bucket : buckets) {
            int bucketNumber = bucket.getBucketNumber();
            LocatedFileStatus file = list.get(bucketNumber);
            boolean splittable = isSplittable(iterator.getInputFormat(), hdfsEnvironment.getFileSystem(session.getUser(), file.getPath()), file.getPath());
            iteratorList.add(createHiveSplitIterator(iterator.getPartitionName(), file.getPath().toString(), file.getBlockLocations(), 0, file.getLen(), iterator.getSchema(), iterator.getPartitionKeys(), splittable, session, OptionalInt.of(bucketNumber), effectivePredicate, partition.getColumnCoercions()));
        }
        addToHiveSplitSourceRoundRobin(iteratorList);
        return;
    }
    // If table is bucketed: list the directory, sort, tag with bucket id
    if (bucketHandle.isPresent()) {
        // HiveFileIterator skips hidden files automatically.
        int bucketCount = bucketHandle.get().getBucketCount();
        List<LocatedFileStatus> list = listAndSortBucketFiles(iterator, bucketCount);
        List<Iterator<HiveSplit>> iteratorList = new ArrayList<>();
        for (int bucketIndex = 0; bucketIndex < bucketCount; bucketIndex++) {
            LocatedFileStatus file = list.get(bucketIndex);
            boolean splittable = isSplittable(iterator.getInputFormat(), hdfsEnvironment.getFileSystem(session.getUser(), file.getPath()), file.getPath());
            iteratorList.add(createHiveSplitIterator(iterator.getPartitionName(), file.getPath().toString(), file.getBlockLocations(), 0, file.getLen(), iterator.getSchema(), iterator.getPartitionKeys(), splittable, session, OptionalInt.of(bucketIndex), iterator.getEffectivePredicate(), partition.getColumnCoercions()));
        }
        addToHiveSplitSourceRoundRobin(iteratorList);
        return;
    }
    fileIterators.addLast(iterator);
}
Also used : HiveBucket(com.facebook.presto.hive.HiveBucketing.HiveBucket) Configuration(org.apache.hadoop.conf.Configuration) ArrayList(java.util.ArrayList) PrestoException(com.facebook.presto.spi.PrestoException) Properties(java.util.Properties) SymlinkTextInputFormat(org.apache.hadoop.hive.ql.io.SymlinkTextInputFormat) FileSystem(org.apache.hadoop.fs.FileSystem) PeekingIterator(com.google.common.collect.PeekingIterator) Iterator(java.util.Iterator) AbstractIterator(com.google.common.collect.AbstractIterator) HiveFileIterator(com.facebook.presto.hive.util.HiveFileIterator) HiveFileIterator(com.facebook.presto.hive.util.HiveFileIterator) JobConf(org.apache.hadoop.mapred.JobConf) InputSplit(org.apache.hadoop.mapred.InputSplit) Path(org.apache.hadoop.fs.Path) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus) TextInputFormat(org.apache.hadoop.mapred.TextInputFormat) SymlinkTextInputFormat(org.apache.hadoop.hive.ql.io.SymlinkTextInputFormat)

Aggregations

PrestoException (com.facebook.presto.spi.PrestoException)747 IOException (java.io.IOException)161 ImmutableList (com.google.common.collect.ImmutableList)106 Type (com.facebook.presto.common.type.Type)95 SchemaTableName (com.facebook.presto.spi.SchemaTableName)88 List (java.util.List)83 ArrayList (java.util.ArrayList)79 Path (org.apache.hadoop.fs.Path)78 Optional (java.util.Optional)68 ImmutableMap (com.google.common.collect.ImmutableMap)64 Map (java.util.Map)64 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)58 Block (com.facebook.presto.common.block.Block)55 Table (com.facebook.presto.hive.metastore.Table)55 TableNotFoundException (com.facebook.presto.spi.TableNotFoundException)55 Objects.requireNonNull (java.util.Objects.requireNonNull)55 Slice (io.airlift.slice.Slice)50 SqlType (com.facebook.presto.spi.function.SqlType)49 Test (org.testng.annotations.Test)49 ConnectorSession (com.facebook.presto.spi.ConnectorSession)46