use of com.facebook.presto.hive.metastore.Partition in project presto by prestodb.
the class AbstractTestHiveClient method doInsertIntoNewPartition.
private void doInsertIntoNewPartition(HiveStorageFormat storageFormat, SchemaTableName tableName) throws Exception {
// creating the table
doCreateEmptyTable(tableName, storageFormat, CREATE_TABLE_COLUMNS_PARTITIONED);
// insert the data
String queryId = insertData(tableName, CREATE_TABLE_PARTITIONED_DATA);
Set<String> existingFiles;
try (Transaction transaction = newTransaction()) {
// verify partitions were created
List<String> partitionNames = transaction.getMetastore(tableName.getSchemaName()).getPartitionNames(tableName.getSchemaName(), tableName.getTableName()).orElseThrow(() -> new PrestoException(HIVE_METASTORE_ERROR, "Partition metadata not available"));
assertEqualsIgnoreOrder(partitionNames, CREATE_TABLE_PARTITIONED_DATA.getMaterializedRows().stream().map(row -> "ds=" + row.getField(CREATE_TABLE_PARTITIONED_DATA.getTypes().size() - 1)).collect(toList()));
// verify the node versions in partitions
Map<String, Optional<Partition>> partitions = getMetastoreClient(tableName.getSchemaName()).getPartitionsByNames(tableName.getSchemaName(), tableName.getTableName(), partitionNames);
assertEquals(partitions.size(), partitionNames.size());
for (String partitionName : partitionNames) {
Partition partition = partitions.get(partitionName).get();
assertEquals(partition.getParameters().get(HiveMetadata.PRESTO_VERSION_NAME), TEST_SERVER_VERSION);
assertEquals(partition.getParameters().get(HiveMetadata.PRESTO_QUERY_ID_NAME), queryId);
}
// load the new table
ConnectorSession session = newSession();
ConnectorMetadata metadata = transaction.getMetadata();
ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
List<ColumnHandle> columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values());
// verify the data
MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.of(storageFormat));
assertEqualsIgnoreOrder(result.getMaterializedRows(), CREATE_TABLE_PARTITIONED_DATA.getMaterializedRows());
// test rollback
existingFiles = listAllDataFiles(transaction, tableName.getSchemaName(), tableName.getTableName());
assertFalse(existingFiles.isEmpty());
}
Path stagingPathRoot;
try (Transaction transaction = newTransaction()) {
ConnectorSession session = newSession();
ConnectorMetadata metadata = transaction.getMetadata();
ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
// "stage" insert data
ConnectorInsertTableHandle insertTableHandle = metadata.beginInsert(session, tableHandle);
stagingPathRoot = getStagingPathRoot(insertTableHandle);
ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, insertTableHandle);
sink.appendPage(CREATE_TABLE_PARTITIONED_DATA_2ND.toPage());
getFutureValue(sink.finish());
// verify we did not modify the table directory
assertEquals(listAllDataFiles(transaction, tableName.getSchemaName(), tableName.getTableName()), existingFiles);
// verify all temp files start with the unique prefix
Set<String> tempFiles = listAllDataFiles(getStagingPathRoot(insertTableHandle));
assertTrue(!tempFiles.isEmpty());
for (String filePath : tempFiles) {
assertTrue(new Path(filePath).getName().startsWith(getFilePrefix(insertTableHandle)));
}
// rollback insert
transaction.rollback();
}
// verify the data is unchanged
try (Transaction transaction = newTransaction()) {
ConnectorSession session = newSession();
ConnectorMetadata metadata = transaction.getMetadata();
ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
List<ColumnHandle> columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values());
MaterializedResult result = readTable(transaction, tableHandle, columnHandles, newSession(), TupleDomain.all(), OptionalInt.empty(), Optional.empty());
assertEqualsIgnoreOrder(result.getMaterializedRows(), CREATE_TABLE_PARTITIONED_DATA.getMaterializedRows());
// verify we did not modify the table directory
assertEquals(listAllDataFiles(transaction, tableName.getSchemaName(), tableName.getTableName()), existingFiles);
// verify temp directory is empty
assertTrue(listAllDataFiles(stagingPathRoot).isEmpty());
}
}
use of com.facebook.presto.hive.metastore.Partition in project presto by prestodb.
the class HiveMetadata method finishCreateTable.
@Override
public Optional<ConnectorOutputMetadata> finishCreateTable(ConnectorSession session, ConnectorOutputTableHandle tableHandle, Collection<Slice> fragments) {
HiveOutputTableHandle handle = (HiveOutputTableHandle) tableHandle;
List<PartitionUpdate> partitionUpdates = fragments.stream().map(Slice::getBytes).map(partitionUpdateCodec::fromJson).collect(toList());
Path targetPath = locationService.targetPathRoot(handle.getLocationHandle());
Path writePath = locationService.writePathRoot(handle.getLocationHandle()).get();
Table table = buildTableObject(session.getQueryId(), handle.getSchemaName(), handle.getTableName(), handle.getTableOwner(), handle.getInputColumns(), handle.getTableStorageFormat(), handle.getPartitionedBy(), handle.getBucketProperty(), handle.getAdditionalTableParameters(), targetPath, false, prestoVersion);
PrincipalPrivileges principalPrivileges = buildInitialPrivilegeSet(handle.getTableOwner());
partitionUpdates = PartitionUpdate.mergePartitionUpdates(partitionUpdates);
if (handle.getBucketProperty().isPresent()) {
ImmutableList<PartitionUpdate> partitionUpdatesForMissingBuckets = computePartitionUpdatesForMissingBuckets(handle, table, partitionUpdates);
// replace partitionUpdates before creating the empty files so that those files will be cleaned up if we end up rollback
partitionUpdates = PartitionUpdate.mergePartitionUpdates(Iterables.concat(partitionUpdates, partitionUpdatesForMissingBuckets));
for (PartitionUpdate partitionUpdate : partitionUpdatesForMissingBuckets) {
Optional<Partition> partition = table.getPartitionColumns().isEmpty() ? Optional.empty() : Optional.of(buildPartitionObject(session.getQueryId(), table, partitionUpdate));
createEmptyFile(partitionUpdate.getWritePath(), table, partition, partitionUpdate.getFileNames());
}
}
metastore.createTable(session, table, principalPrivileges, Optional.of(writePath));
if (!handle.getPartitionedBy().isEmpty()) {
if (respectTableFormat) {
Verify.verify(handle.getPartitionStorageFormat() == handle.getTableStorageFormat());
}
partitionUpdates.forEach(partitionUpdate -> metastore.addPartition(session, handle.getSchemaName(), handle.getTableName(), buildPartitionObject(session.getQueryId(), table, partitionUpdate), partitionUpdate.getWritePath()));
}
return Optional.of(new HiveWrittenPartitions(partitionUpdates.stream().map(PartitionUpdate::getName).collect(Collectors.toList())));
}
use of com.facebook.presto.hive.metastore.Partition in project presto by prestodb.
the class ManifestPartitionLoader method createInternalHiveSplitFactory.
private InternalHiveSplitFactory createInternalHiveSplitFactory(Table table, HivePartitionMetadata partition, ConnectorSession session, Optional<Domain> pathDomain, HdfsEnvironment hdfsEnvironment, HdfsContext hdfsContext, boolean schedulerUsesHostAddresses) throws IOException {
String partitionName = partition.getHivePartition().getPartitionId();
Storage storage = partition.getPartition().map(Partition::getStorage).orElse(table.getStorage());
String inputFormatName = storage.getStorageFormat().getInputFormat();
int partitionDataColumnCount = partition.getPartition().map(p -> p.getColumns().size()).orElse(table.getDataColumns().size());
List<HivePartitionKey> partitionKeys = getPartitionKeys(table, partition.getPartition(), partitionName);
Path path = new Path(getPartitionLocation(table, partition.getPartition()));
Configuration configuration = hdfsEnvironment.getConfiguration(hdfsContext, path);
InputFormat<?, ?> inputFormat = getInputFormat(configuration, inputFormatName, false);
ExtendedFileSystem fileSystem = hdfsEnvironment.getFileSystem(hdfsContext, path);
return new InternalHiveSplitFactory(fileSystem, inputFormat, pathDomain, getNodeSelectionStrategy(session), getMaxInitialSplitSize(session), false, new HiveSplitPartitionInfo(storage, path.toUri(), partitionKeys, partitionName, partitionDataColumnCount, partition.getTableToPartitionMapping(), Optional.empty(), partition.getRedundantColumnDomains()), schedulerUsesHostAddresses, partition.getEncryptionInformation());
}
use of com.facebook.presto.hive.metastore.Partition in project presto by prestodb.
the class StoragePartitionLoader method getBucketedSplits.
private List<InternalHiveSplit> getBucketedSplits(Path path, ExtendedFileSystem fileSystem, InternalHiveSplitFactory splitFactory, BucketSplitInfo bucketSplitInfo, Optional<HiveSplit.BucketConversion> bucketConversion, String partitionName, boolean splittable, PathFilter pathFilter) {
int readBucketCount = bucketSplitInfo.getReadBucketCount();
int tableBucketCount = bucketSplitInfo.getTableBucketCount();
int partitionBucketCount = bucketConversion.map(HiveSplit.BucketConversion::getPartitionBucketCount).orElse(tableBucketCount);
int bucketCount = max(readBucketCount, partitionBucketCount);
checkState(readBucketCount <= tableBucketCount, "readBucketCount(%s) should be less than or equal to tableBucketCount(%s)", readBucketCount, tableBucketCount);
// list all files in the partition
List<HiveFileInfo> fileInfos = new ArrayList<>(partitionBucketCount);
try {
Iterators.addAll(fileInfos, directoryLister.list(fileSystem, table, path, namenodeStats, pathFilter, new HiveDirectoryContext(FAIL, isUseListDirectoryCache(session))));
} catch (HiveFileIterator.NestedDirectoryNotAllowedException e) {
// Fail here to be on the safe side. This seems to be the same as what Hive does
throw new PrestoException(HIVE_INVALID_BUCKET_FILES, format("Hive table '%s' is corrupt. Found sub-directory in bucket directory for partition: %s", table.getSchemaTableName(), partitionName));
}
ListMultimap<Integer, HiveFileInfo> bucketToFileInfo = ArrayListMultimap.create();
if (!shouldCreateFilesForMissingBuckets(table, session)) {
fileInfos.stream().forEach(fileInfo -> {
String fileName = fileInfo.getPath().getName();
OptionalInt bucket = getBucketNumber(fileName);
if (bucket.isPresent()) {
bucketToFileInfo.put(bucket.getAsInt(), fileInfo);
} else {
throw new PrestoException(HIVE_INVALID_BUCKET_FILES, format("invalid hive bucket file name: %s", fileName));
}
});
} else {
// build mapping of file name to bucket
for (HiveFileInfo file : fileInfos) {
String fileName = file.getPath().getName();
OptionalInt bucket = getBucketNumber(fileName);
if (bucket.isPresent()) {
bucketToFileInfo.put(bucket.getAsInt(), file);
continue;
}
// legacy mode requires exactly one file per bucket
if (fileInfos.size() != partitionBucketCount) {
throw new PrestoException(HIVE_INVALID_BUCKET_FILES, format("Hive table '%s' is corrupt. File '%s' does not match the standard naming pattern, and the number " + "of files in the directory (%s) does not match the declared bucket count (%s) for partition: %s", table.getSchemaTableName(), fileName, fileInfos.size(), partitionBucketCount, partitionName));
}
if (fileInfos.get(0).getPath().getName().matches("\\d+")) {
try {
// File names are integer if they are created when file_renaming_enabled is set to true
fileInfos.sort(Comparator.comparingInt(fileInfo -> Integer.parseInt(fileInfo.getPath().getName())));
} catch (NumberFormatException e) {
throw new PrestoException(HIVE_INVALID_FILE_NAMES, format("Hive table '%s' is corrupt. Some of the filenames in the partition: %s are not integers", new SchemaTableName(table.getDatabaseName(), table.getTableName()), partitionName));
}
} else {
// Sort FileStatus objects (instead of, e.g., fileStatus.getPath().toString). This matches org.apache.hadoop.hive.ql.metadata.Table.getSortedPaths
fileInfos.sort(null);
}
// Use position in sorted list as the bucket number
bucketToFileInfo.clear();
for (int i = 0; i < fileInfos.size(); i++) {
bucketToFileInfo.put(i, fileInfos.get(i));
}
break;
}
}
// convert files internal splits
List<InternalHiveSplit> splitList = new ArrayList<>();
for (int bucketNumber = 0; bucketNumber < bucketCount; bucketNumber++) {
// Physical bucket #. This determine file name. It also determines the order of splits in the result.
int partitionBucketNumber = bucketNumber % partitionBucketCount;
if (!bucketToFileInfo.containsKey(partitionBucketNumber)) {
continue;
}
// Logical bucket #. Each logical bucket corresponds to a "bucket" from engine's perspective.
int readBucketNumber = bucketNumber % readBucketCount;
boolean containsIneligibleTableBucket = false;
List<Integer> eligibleTableBucketNumbers = new ArrayList<>();
for (int tableBucketNumber = bucketNumber % tableBucketCount; tableBucketNumber < tableBucketCount; tableBucketNumber += bucketCount) {
// table bucket number: this is used for evaluating "$bucket" filters.
if (bucketSplitInfo.isTableBucketEnabled(tableBucketNumber)) {
eligibleTableBucketNumbers.add(tableBucketNumber);
} else {
containsIneligibleTableBucket = true;
}
}
if (!eligibleTableBucketNumbers.isEmpty() && containsIneligibleTableBucket) {
throw new PrestoException(NOT_SUPPORTED, "The bucket filter cannot be satisfied. There are restrictions on the bucket filter when all the following is true: " + "1. a table has a different buckets count as at least one of its partitions that is read in this query; " + "2. the table has a different but compatible bucket number with another table in the query; " + "3. some buckets of the table is filtered out from the query, most likely using a filter on \"$bucket\". " + "(table name: " + table.getTableName() + ", table bucket count: " + tableBucketCount + ", " + "partition bucket count: " + partitionBucketCount + ", effective reading bucket count: " + readBucketCount + ")");
}
if (!eligibleTableBucketNumbers.isEmpty()) {
for (HiveFileInfo fileInfo : bucketToFileInfo.get(partitionBucketNumber)) {
eligibleTableBucketNumbers.stream().map(tableBucketNumber -> splitFactory.createInternalHiveSplit(fileInfo, readBucketNumber, tableBucketNumber, splittable)).forEach(optionalSplit -> optionalSplit.ifPresent(splitList::add));
}
}
}
return splitList;
}
use of com.facebook.presto.hive.metastore.Partition in project presto by prestodb.
the class AbstractTestHiveClient method testStorePartitionWithStatistics.
protected void testStorePartitionWithStatistics(List<ColumnMetadata> columns, PartitionStatistics statsForAllColumns1, PartitionStatistics statsForAllColumns2, PartitionStatistics statsForSubsetOfColumns, PartitionStatistics emptyStatistics, Duration delayBetweenAlters) throws Exception {
SchemaTableName tableName = temporaryTable("store_partition_with_statistics");
try {
doCreateEmptyTable(tableName, ORC, columns);
ExtendedHiveMetastore metastoreClient = getMetastoreClient();
Table table = metastoreClient.getTable(METASTORE_CONTEXT, tableName.getSchemaName(), tableName.getTableName()).orElseThrow(() -> new TableNotFoundException(tableName));
List<String> partitionValues = ImmutableList.of("2016-01-01");
String partitionName = makePartName(ImmutableList.of("ds"), partitionValues);
Partition partition = createDummyPartition(table, partitionName);
// create partition with stats for all columns
metastoreClient.addPartitions(METASTORE_CONTEXT, tableName.getSchemaName(), tableName.getTableName(), ImmutableList.of(new PartitionWithStatistics(partition, partitionName, statsForAllColumns1)));
assertEquals(metastoreClient.getPartition(METASTORE_CONTEXT, tableName.getSchemaName(), tableName.getTableName(), partitionValues).get().getStorage().getStorageFormat(), fromHiveStorageFormat(ORC));
assertThat(metastoreClient.getPartitionStatistics(METASTORE_CONTEXT, tableName.getSchemaName(), tableName.getTableName(), ImmutableSet.of(partitionName))).isEqualTo(ImmutableMap.of(partitionName, statsForAllColumns1));
sleep(delayBetweenAlters.toMillis());
// alter the partition into one with other stats
Partition modifiedPartition = Partition.builder(partition).withStorage(storage -> storage.setStorageFormat(fromHiveStorageFormat(DWRF)).setLocation(partitionTargetPath(tableName, partitionName))).build();
metastoreClient.alterPartition(METASTORE_CONTEXT, tableName.getSchemaName(), tableName.getTableName(), new PartitionWithStatistics(modifiedPartition, partitionName, statsForAllColumns2));
assertEquals(metastoreClient.getPartition(METASTORE_CONTEXT, tableName.getSchemaName(), tableName.getTableName(), partitionValues).get().getStorage().getStorageFormat(), fromHiveStorageFormat(DWRF));
assertThat(metastoreClient.getPartitionStatistics(METASTORE_CONTEXT, tableName.getSchemaName(), tableName.getTableName(), ImmutableSet.of(partitionName))).isEqualTo(ImmutableMap.of(partitionName, statsForAllColumns2));
sleep(delayBetweenAlters.toMillis());
// alter the partition into one with stats for only subset of columns
modifiedPartition = Partition.builder(partition).withStorage(storage -> storage.setStorageFormat(fromHiveStorageFormat(TEXTFILE)).setLocation(partitionTargetPath(tableName, partitionName))).build();
metastoreClient.alterPartition(METASTORE_CONTEXT, tableName.getSchemaName(), tableName.getTableName(), new PartitionWithStatistics(modifiedPartition, partitionName, statsForSubsetOfColumns));
assertThat(metastoreClient.getPartitionStatistics(METASTORE_CONTEXT, tableName.getSchemaName(), tableName.getTableName(), ImmutableSet.of(partitionName))).isEqualTo(ImmutableMap.of(partitionName, statsForSubsetOfColumns));
sleep(delayBetweenAlters.toMillis());
// alter the partition into one without stats
modifiedPartition = Partition.builder(partition).withStorage(storage -> storage.setStorageFormat(fromHiveStorageFormat(TEXTFILE)).setLocation(partitionTargetPath(tableName, partitionName))).build();
metastoreClient.alterPartition(METASTORE_CONTEXT, tableName.getSchemaName(), tableName.getTableName(), new PartitionWithStatistics(modifiedPartition, partitionName, emptyStatistics));
assertThat(metastoreClient.getPartitionStatistics(METASTORE_CONTEXT, tableName.getSchemaName(), tableName.getTableName(), ImmutableSet.of(partitionName))).isEqualTo(ImmutableMap.of(partitionName, emptyStatistics));
} finally {
dropTable(tableName);
}
}
Aggregations