use of com.facebook.presto.spi.PrestoException in project presto by prestodb.
the class HiveMetadata method beginInsert.
@Override
public HiveInsertTableHandle beginInsert(ConnectorSession session, ConnectorTableHandle tableHandle) {
verifyJvmTimeZone();
SchemaTableName tableName = schemaTableName(tableHandle);
Optional<Table> table = metastore.getTable(tableName.getSchemaName(), tableName.getTableName());
if (!table.isPresent()) {
throw new TableNotFoundException(tableName);
}
checkTableIsWritable(table.get());
for (Column column : table.get().getDataColumns()) {
if (!isWritableType(column.getType())) {
throw new PrestoException(NOT_SUPPORTED, format("Inserting into Hive table %s.%s with column type %s not supported", table.get().getDatabaseName(), table.get().getTableName(), column.getType()));
}
}
List<HiveColumnHandle> handles = hiveColumnHandles(connectorId, table.get()).stream().filter(columnHandle -> !columnHandle.isHidden()).collect(toList());
HiveStorageFormat tableStorageFormat = extractHiveStorageFormat(table.get());
LocationHandle locationHandle = locationService.forExistingTable(metastore, session.getUser(), session.getQueryId(), table.get());
HiveInsertTableHandle result = new HiveInsertTableHandle(connectorId, tableName.getSchemaName(), tableName.getTableName(), handles, session.getQueryId(), metastore.generatePageSinkMetadata(tableName), locationHandle, table.get().getStorage().getBucketProperty(), tableStorageFormat, respectTableFormat ? tableStorageFormat : defaultStorageFormat);
Optional<Path> writePathRoot = locationService.writePathRoot(locationHandle);
Path targetPathRoot = locationService.targetPathRoot(locationHandle);
if (writePathRoot.isPresent()) {
WriteMode mode = writePathRoot.get().equals(targetPathRoot) ? DIRECT_TO_TARGET_NEW_DIRECTORY : STAGE_AND_MOVE_TO_TARGET_DIRECTORY;
metastore.declareIntentionToWrite(session, mode, writePathRoot.get(), result.getFilePrefix(), tableName);
} else {
metastore.declareIntentionToWrite(session, DIRECT_TO_TARGET_EXISTING_DIRECTORY, targetPathRoot, result.getFilePrefix(), tableName);
}
return result;
}
use of com.facebook.presto.spi.PrestoException in project presto by prestodb.
the class HivePartitionManager method getPartitions.
public HivePartitionResult getPartitions(SemiTransactionalHiveMetastore metastore, ConnectorTableHandle tableHandle, Constraint<ColumnHandle> constraint) {
HiveTableHandle hiveTableHandle = (HiveTableHandle) tableHandle;
TupleDomain<ColumnHandle> effectivePredicate = constraint.getSummary();
SchemaTableName tableName = hiveTableHandle.getSchemaTableName();
Table table = getTable(metastore, tableName);
Optional<HiveBucketHandle> hiveBucketHandle = getHiveBucketHandle(connectorId, table);
List<HiveColumnHandle> partitionColumns = getPartitionKeyColumnHandles(connectorId, table);
List<HiveBucket> buckets = getHiveBucketNumbers(table, effectivePredicate);
TupleDomain<HiveColumnHandle> compactEffectivePredicate = toCompactTupleDomain(effectivePredicate, domainCompactionThreshold);
if (effectivePredicate.isNone()) {
return new HivePartitionResult(partitionColumns, ImmutableList.of(), TupleDomain.none(), TupleDomain.none(), hiveBucketHandle);
}
if (partitionColumns.isEmpty()) {
return new HivePartitionResult(partitionColumns, ImmutableList.of(new HivePartition(tableName, compactEffectivePredicate, buckets)), effectivePredicate, TupleDomain.none(), hiveBucketHandle);
}
List<Type> partitionTypes = partitionColumns.stream().map(column -> typeManager.getType(column.getTypeSignature())).collect(toList());
List<String> partitionNames = getFilteredPartitionNames(metastore, tableName, partitionColumns, effectivePredicate);
// do a final pass to filter based on fields that could not be used to filter the partitions
int partitionCount = 0;
ImmutableList.Builder<HivePartition> partitions = ImmutableList.builder();
for (String partitionName : partitionNames) {
Optional<Map<ColumnHandle, NullableValue>> values = parseValuesAndFilterPartition(partitionName, partitionColumns, partitionTypes, constraint);
if (values.isPresent()) {
if (partitionCount == maxPartitions) {
throw new PrestoException(HIVE_EXCEEDED_PARTITION_LIMIT, format("Query over table '%s' can potentially read more than %s partitions", hiveTableHandle.getSchemaTableName(), maxPartitions));
}
partitionCount++;
partitions.add(new HivePartition(tableName, compactEffectivePredicate, partitionName, values.get(), buckets));
}
}
// All partition key domains will be fully evaluated, so we don't need to include those
TupleDomain<ColumnHandle> remainingTupleDomain = TupleDomain.withColumnDomains(Maps.filterKeys(effectivePredicate.getDomains().get(), not(Predicates.in(partitionColumns))));
TupleDomain<ColumnHandle> enforcedTupleDomain = TupleDomain.withColumnDomains(Maps.filterKeys(effectivePredicate.getDomains().get(), Predicates.in(partitionColumns)));
return new HivePartitionResult(partitionColumns, partitions.build(), remainingTupleDomain, enforcedTupleDomain, hiveBucketHandle);
}
use of com.facebook.presto.spi.PrestoException in project presto by prestodb.
the class HivePartitionManager method getFilteredPartitionNames.
private List<String> getFilteredPartitionNames(SemiTransactionalHiveMetastore metastore, SchemaTableName tableName, List<HiveColumnHandle> partitionKeys, TupleDomain<ColumnHandle> effectivePredicate) {
checkArgument(effectivePredicate.getDomains().isPresent());
List<String> filter = new ArrayList<>();
for (HiveColumnHandle partitionKey : partitionKeys) {
Domain domain = effectivePredicate.getDomains().get().get(partitionKey);
if (domain != null && domain.isNullableSingleValue()) {
Object value = domain.getNullableSingleValue();
if (value == null) {
filter.add(HivePartitionKey.HIVE_DEFAULT_DYNAMIC_PARTITION);
} else if (value instanceof Slice) {
filter.add(((Slice) value).toStringUtf8());
} else if ((value instanceof Boolean) || (value instanceof Double) || (value instanceof Long)) {
if (assumeCanonicalPartitionKeys) {
filter.add(value.toString());
} else {
// Hive treats '0', 'false', and 'False' the same. However, the metastore differentiates between these.
filter.add(PARTITION_VALUE_WILDCARD);
}
} else {
throw new PrestoException(NOT_SUPPORTED, "Only Boolean, Double and Long partition keys are supported");
}
} else {
filter.add(PARTITION_VALUE_WILDCARD);
}
}
// fetch the partition names
return metastore.getPartitionNamesByParts(tableName.getSchemaName(), tableName.getTableName(), filter).orElseThrow(() -> new TableNotFoundException(tableName));
}
use of com.facebook.presto.spi.PrestoException in project presto by prestodb.
the class BackgroundHiveSplitLoader method listAndSortBucketFiles.
private static List<LocatedFileStatus> listAndSortBucketFiles(HiveFileIterator hiveFileIterator, int bucketCount) {
ArrayList<LocatedFileStatus> list = new ArrayList<>(bucketCount);
while (hiveFileIterator.hasNext()) {
LocatedFileStatus next = hiveFileIterator.next();
if (isDirectory(next)) {
// Fail here to be on the safe side. This seems to be the same as what Hive does
throw new PrestoException(HIVE_INVALID_BUCKET_FILES, format("%s Found sub-directory in bucket directory for partition: %s", CORRUPT_BUCKETING, hiveFileIterator.getPartitionName()));
}
list.add(next);
}
if (list.size() != bucketCount) {
throw new PrestoException(HIVE_INVALID_BUCKET_FILES, format("%s The number of files in the directory (%s) does not match the declared bucket count (%s) for partition: %s", CORRUPT_BUCKETING, list.size(), bucketCount, hiveFileIterator.getPartitionName()));
}
// Sort FileStatus objects (instead of, e.g., fileStatus.getPath().toString). This matches org.apache.hadoop.hive.ql.metadata.Table.getSortedPaths
list.sort(null);
return list;
}
use of com.facebook.presto.spi.PrestoException in project presto by prestodb.
the class BackgroundHiveSplitLoader method loadPartition.
private void loadPartition(HivePartitionMetadata partition) throws IOException {
String partitionName = partition.getHivePartition().getPartitionId();
Properties schema = getPartitionSchema(table, partition.getPartition());
List<HivePartitionKey> partitionKeys = getPartitionKeys(table, partition.getPartition());
TupleDomain<HiveColumnHandle> effectivePredicate = partition.getHivePartition().getEffectivePredicate();
Path path = new Path(getPartitionLocation(table, partition.getPartition()));
Configuration configuration = hdfsEnvironment.getConfiguration(path);
InputFormat<?, ?> inputFormat = getInputFormat(configuration, schema, false);
FileSystem fs = hdfsEnvironment.getFileSystem(session.getUser(), path);
if (inputFormat instanceof SymlinkTextInputFormat) {
if (bucketHandle.isPresent()) {
throw new PrestoException(StandardErrorCode.NOT_SUPPORTED, "Bucketed table in SymlinkTextInputFormat is not yet supported");
}
// TODO: This should use an iterator like the HiveFileIterator
for (Path targetPath : getTargetPathsFromSymlink(fs, path)) {
// The input should be in TextInputFormat.
TextInputFormat targetInputFormat = new TextInputFormat();
// get the configuration for the target path -- it may be a different hdfs instance
Configuration targetConfiguration = hdfsEnvironment.getConfiguration(targetPath);
JobConf targetJob = new JobConf(targetConfiguration);
targetJob.setInputFormat(TextInputFormat.class);
targetInputFormat.configure(targetJob);
FileInputFormat.setInputPaths(targetJob, targetPath);
InputSplit[] targetSplits = targetInputFormat.getSplits(targetJob, 0);
if (addSplitsToSource(targetSplits, partitionName, partitionKeys, schema, effectivePredicate, partition.getColumnCoercions())) {
return;
}
}
return;
}
// on the input format to obtain file splits.
if (shouldUseFileSplitsFromInputFormat(inputFormat)) {
JobConf jobConf = new JobConf(configuration);
FileInputFormat.setInputPaths(jobConf, path);
InputSplit[] splits = inputFormat.getSplits(jobConf, 0);
addSplitsToSource(splits, partitionName, partitionKeys, schema, effectivePredicate, partition.getColumnCoercions());
return;
}
// If only one bucket could match: load that one file
HiveFileIterator iterator = new HiveFileIterator(path, fs, directoryLister, namenodeStats, partitionName, inputFormat, schema, partitionKeys, effectivePredicate, partition.getColumnCoercions());
if (!buckets.isEmpty()) {
int bucketCount = buckets.get(0).getBucketCount();
List<LocatedFileStatus> list = listAndSortBucketFiles(iterator, bucketCount);
List<Iterator<HiveSplit>> iteratorList = new ArrayList<>();
for (HiveBucket bucket : buckets) {
int bucketNumber = bucket.getBucketNumber();
LocatedFileStatus file = list.get(bucketNumber);
boolean splittable = isSplittable(iterator.getInputFormat(), hdfsEnvironment.getFileSystem(session.getUser(), file.getPath()), file.getPath());
iteratorList.add(createHiveSplitIterator(iterator.getPartitionName(), file.getPath().toString(), file.getBlockLocations(), 0, file.getLen(), iterator.getSchema(), iterator.getPartitionKeys(), splittable, session, OptionalInt.of(bucketNumber), effectivePredicate, partition.getColumnCoercions()));
}
addToHiveSplitSourceRoundRobin(iteratorList);
return;
}
// If table is bucketed: list the directory, sort, tag with bucket id
if (bucketHandle.isPresent()) {
// HiveFileIterator skips hidden files automatically.
int bucketCount = bucketHandle.get().getBucketCount();
List<LocatedFileStatus> list = listAndSortBucketFiles(iterator, bucketCount);
List<Iterator<HiveSplit>> iteratorList = new ArrayList<>();
for (int bucketIndex = 0; bucketIndex < bucketCount; bucketIndex++) {
LocatedFileStatus file = list.get(bucketIndex);
boolean splittable = isSplittable(iterator.getInputFormat(), hdfsEnvironment.getFileSystem(session.getUser(), file.getPath()), file.getPath());
iteratorList.add(createHiveSplitIterator(iterator.getPartitionName(), file.getPath().toString(), file.getBlockLocations(), 0, file.getLen(), iterator.getSchema(), iterator.getPartitionKeys(), splittable, session, OptionalInt.of(bucketIndex), iterator.getEffectivePredicate(), partition.getColumnCoercions()));
}
addToHiveSplitSourceRoundRobin(iteratorList);
return;
}
fileIterators.addLast(iterator);
}
Aggregations