use of io.trino.plugin.hive.metastore.Partition in project trino by trinodb.
the class TestProtoUtils method testPartition.
@Test
public void testPartition() {
alluxio.grpc.table.layout.hive.PartitionInfo.Builder partitionInfo = TestingAlluxioMetastoreObjects.getTestingPartitionInfo();
Partition partition = ProtoUtils.fromProto(partitionInfo.build());
assertEquals(partitionInfo.getDataColsList().stream().map(ProtoUtils::fromProto).collect(Collectors.toList()), partition.getColumns());
assertEquals(partitionInfo.getDbName(), partition.getDatabaseName());
assertEquals(partitionInfo.getParametersMap(), partition.getParameters());
assertEquals(partitionInfo.getValuesList(), partition.getValues());
assertEquals(partitionInfo.getTableName(), partition.getTableName());
Storage s = partition.getStorage();
alluxio.grpc.table.layout.hive.Storage storage = TestingAlluxioMetastoreObjects.getTestingPartitionInfo().getStorage();
assertEquals(storage.getSkewed(), s.isSkewed());
assertEquals(ProtoUtils.fromProto(storage.getStorageFormat()), s.getStorageFormat());
assertEquals(storage.getLocation(), s.getLocation());
assertEquals(ProtoUtils.fromProto(partitionInfo.getParametersMap(), storage.getBucketProperty()), s.getBucketProperty());
assertEquals(storage.getStorageFormat().getSerdelibParametersMap(), s.getSerdeParameters());
}
use of io.trino.plugin.hive.metastore.Partition in project trino by trinodb.
the class AbstractTestHive method listAllDataPaths.
public static List<String> listAllDataPaths(SemiTransactionalHiveMetastore metastore, String schemaName, String tableName) {
ImmutableList.Builder<String> locations = ImmutableList.builder();
Table table = metastore.getTable(schemaName, tableName).get();
if (table.getStorage().getLocation() != null) {
// For partitioned table, there should be nothing directly under this directory.
// But including this location in the set makes the directory content assert more
// extensive, which is desirable.
locations.add(table.getStorage().getLocation());
}
Optional<List<String>> partitionNames = metastore.getPartitionNames(schemaName, tableName);
if (partitionNames.isPresent()) {
metastore.getPartitionsByNames(schemaName, tableName, partitionNames.get()).values().stream().map(Optional::get).map(partition -> partition.getStorage().getLocation()).filter(location -> !location.startsWith(table.getStorage().getLocation())).forEach(locations::add);
}
return locations.build();
}
use of io.trino.plugin.hive.metastore.Partition in project trino by trinodb.
the class AbstractTestHive method testStorePartitionWithStatistics.
protected void testStorePartitionWithStatistics(List<ColumnMetadata> columns, PartitionStatistics statsForAllColumns1, PartitionStatistics statsForAllColumns2, PartitionStatistics statsForSubsetOfColumns, PartitionStatistics emptyStatistics) throws Exception {
SchemaTableName tableName = temporaryTable("store_partition_with_statistics");
try {
doCreateEmptyTable(tableName, ORC, columns);
HiveMetastoreClosure metastoreClient = new HiveMetastoreClosure(getMetastoreClient());
Table table = metastoreClient.getTable(tableName.getSchemaName(), tableName.getTableName()).orElseThrow(() -> new TableNotFoundException(tableName));
List<String> partitionValues = ImmutableList.of("2016-01-01");
String partitionName = makePartName(ImmutableList.of("ds"), partitionValues);
Partition partition = createDummyPartition(table, partitionName);
// create partition with stats for all columns
metastoreClient.addPartitions(tableName.getSchemaName(), tableName.getTableName(), ImmutableList.of(new PartitionWithStatistics(partition, partitionName, statsForAllColumns1)));
assertEquals(metastoreClient.getPartition(tableName.getSchemaName(), tableName.getTableName(), partitionValues).get().getStorage().getStorageFormat(), fromHiveStorageFormat(ORC));
assertThat(metastoreClient.getPartitionStatistics(tableName.getSchemaName(), tableName.getTableName(), ImmutableSet.of(partitionName))).isEqualTo(ImmutableMap.of(partitionName, statsForAllColumns1));
// alter the partition into one with other stats
Partition modifiedPartition = Partition.builder(partition).withStorage(storage -> storage.setStorageFormat(fromHiveStorageFormat(RCBINARY)).setLocation(partitionTargetPath(tableName, partitionName))).build();
metastoreClient.alterPartition(tableName.getSchemaName(), tableName.getTableName(), new PartitionWithStatistics(modifiedPartition, partitionName, statsForAllColumns2));
assertEquals(metastoreClient.getPartition(tableName.getSchemaName(), tableName.getTableName(), partitionValues).get().getStorage().getStorageFormat(), fromHiveStorageFormat(RCBINARY));
assertThat(metastoreClient.getPartitionStatistics(tableName.getSchemaName(), tableName.getTableName(), ImmutableSet.of(partitionName))).isEqualTo(ImmutableMap.of(partitionName, statsForAllColumns2));
// alter the partition into one with stats for only subset of columns
modifiedPartition = Partition.builder(partition).withStorage(storage -> storage.setStorageFormat(fromHiveStorageFormat(TEXTFILE)).setLocation(partitionTargetPath(tableName, partitionName))).build();
metastoreClient.alterPartition(tableName.getSchemaName(), tableName.getTableName(), new PartitionWithStatistics(modifiedPartition, partitionName, statsForSubsetOfColumns));
assertThat(metastoreClient.getPartitionStatistics(tableName.getSchemaName(), tableName.getTableName(), ImmutableSet.of(partitionName))).isEqualTo(ImmutableMap.of(partitionName, statsForSubsetOfColumns));
// alter the partition into one without stats
modifiedPartition = Partition.builder(partition).withStorage(storage -> storage.setStorageFormat(fromHiveStorageFormat(TEXTFILE)).setLocation(partitionTargetPath(tableName, partitionName))).build();
metastoreClient.alterPartition(tableName.getSchemaName(), tableName.getTableName(), new PartitionWithStatistics(modifiedPartition, partitionName, emptyStatistics));
assertThat(metastoreClient.getPartitionStatistics(tableName.getSchemaName(), tableName.getTableName(), ImmutableSet.of(partitionName))).isEqualTo(ImmutableMap.of(partitionName, emptyStatistics));
} finally {
dropTable(tableName);
}
}
use of io.trino.plugin.hive.metastore.Partition in project trino by trinodb.
the class AbstractTestHive method doInsertIntoNewPartition.
private void doInsertIntoNewPartition(HiveStorageFormat storageFormat, SchemaTableName tableName) throws Exception {
// creating the table
doCreateEmptyTable(tableName, storageFormat, CREATE_TABLE_COLUMNS_PARTITIONED);
// insert the data
String queryId = insertData(tableName, CREATE_TABLE_PARTITIONED_DATA);
Set<String> existingFiles;
try (Transaction transaction = newTransaction()) {
// verify partitions were created
Table table = metastoreClient.getTable(tableName.getSchemaName(), tableName.getTableName()).orElseThrow(() -> new TableNotFoundException(tableName));
List<String> partitionNames = transaction.getMetastore().getPartitionNames(tableName.getSchemaName(), tableName.getTableName()).orElseThrow(() -> new AssertionError("Table does not exist: " + tableName));
assertEqualsIgnoreOrder(partitionNames, CREATE_TABLE_PARTITIONED_DATA.getMaterializedRows().stream().map(row -> "ds=" + row.getField(CREATE_TABLE_PARTITIONED_DATA.getTypes().size() - 1)).collect(toImmutableList()));
// verify the node versions in partitions
Map<String, Optional<Partition>> partitions = getMetastoreClient().getPartitionsByNames(table, partitionNames);
assertEquals(partitions.size(), partitionNames.size());
for (String partitionName : partitionNames) {
Partition partition = partitions.get(partitionName).get();
assertEquals(partition.getParameters().get(PRESTO_VERSION_NAME), TEST_SERVER_VERSION);
assertEquals(partition.getParameters().get(PRESTO_QUERY_ID_NAME), queryId);
}
// load the new table
ConnectorSession session = newSession();
ConnectorMetadata metadata = transaction.getMetadata();
metadata.beginQuery(session);
ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
List<ColumnHandle> columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values());
// verify the data
MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.of(storageFormat));
assertEqualsIgnoreOrder(result.getMaterializedRows(), CREATE_TABLE_PARTITIONED_DATA.getMaterializedRows());
// test rollback
existingFiles = listAllDataFiles(transaction, tableName.getSchemaName(), tableName.getTableName());
assertFalse(existingFiles.isEmpty());
// test statistics
for (String partitionName : partitionNames) {
HiveBasicStatistics partitionStatistics = getBasicStatisticsForPartition(transaction, tableName, partitionName);
assertEquals(partitionStatistics.getRowCount().getAsLong(), 1L);
assertEquals(partitionStatistics.getFileCount().getAsLong(), 1L);
assertGreaterThan(partitionStatistics.getInMemoryDataSizeInBytes().getAsLong(), 0L);
assertGreaterThan(partitionStatistics.getOnDiskDataSizeInBytes().getAsLong(), 0L);
}
}
Path stagingPathRoot;
try (Transaction transaction = newTransaction()) {
ConnectorSession session = newSession();
ConnectorMetadata metadata = transaction.getMetadata();
ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
// "stage" insert data
ConnectorInsertTableHandle insertTableHandle = metadata.beginInsert(session, tableHandle, ImmutableList.of(), NO_RETRIES);
stagingPathRoot = getStagingPathRoot(insertTableHandle);
ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, insertTableHandle);
sink.appendPage(CREATE_TABLE_PARTITIONED_DATA_2ND.toPage());
Collection<Slice> fragments = getFutureValue(sink.finish());
metadata.finishInsert(session, insertTableHandle, fragments, ImmutableList.of());
// verify all temp files start with the unique prefix
HdfsContext context = new HdfsContext(session);
Set<String> tempFiles = listAllDataFiles(context, getStagingPathRoot(insertTableHandle));
assertTrue(!tempFiles.isEmpty());
for (String filePath : tempFiles) {
assertThat(new Path(filePath).getName()).startsWith(session.getQueryId());
}
// rollback insert
transaction.rollback();
}
// verify the data is unchanged
try (Transaction transaction = newTransaction()) {
ConnectorSession session = newSession();
ConnectorMetadata metadata = transaction.getMetadata();
metadata.beginQuery(session);
ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
List<ColumnHandle> columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values());
MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.empty());
assertEqualsIgnoreOrder(result.getMaterializedRows(), CREATE_TABLE_PARTITIONED_DATA.getMaterializedRows());
// verify we did not modify the table directory
assertEquals(listAllDataFiles(transaction, tableName.getSchemaName(), tableName.getTableName()), existingFiles);
// verify temp directory is empty
HdfsContext context = new HdfsContext(session);
assertTrue(listAllDataFiles(context, stagingPathRoot).isEmpty());
}
}
use of io.trino.plugin.hive.metastore.Partition in project trino by trinodb.
the class RegisterPartitionProcedure method doRegisterPartition.
private void doRegisterPartition(ConnectorSession session, ConnectorAccessControl accessControl, String schemaName, String tableName, List<String> partitionColumn, List<String> partitionValues, String location) {
if (!allowRegisterPartition) {
throw new TrinoException(PERMISSION_DENIED, "register_partition procedure is disabled");
}
SemiTransactionalHiveMetastore metastore = hiveMetadataFactory.create(session.getIdentity(), true).getMetastore();
HdfsContext hdfsContext = new HdfsContext(session);
SchemaTableName schemaTableName = new SchemaTableName(schemaName, tableName);
Table table = metastore.getTable(schemaName, tableName).orElseThrow(() -> new TableNotFoundException(schemaTableName));
accessControl.checkCanInsertIntoTable(null, schemaTableName);
checkIsPartitionedTable(table);
checkPartitionColumns(table, partitionColumn);
Optional<Partition> partition = metastore.unsafeGetRawHiveMetastoreClosure().getPartition(schemaName, tableName, partitionValues);
if (partition.isPresent()) {
String partitionName = FileUtils.makePartName(partitionColumn, partitionValues);
throw new TrinoException(ALREADY_EXISTS, format("Partition [%s] is already registered with location %s", partitionName, partition.get().getStorage().getLocation()));
}
Path partitionLocation;
if (location == null) {
partitionLocation = new Path(table.getStorage().getLocation(), FileUtils.makePartName(partitionColumn, partitionValues));
} else {
partitionLocation = new Path(location);
}
if (!HiveWriteUtils.pathExists(hdfsContext, hdfsEnvironment, partitionLocation)) {
throw new TrinoException(INVALID_PROCEDURE_ARGUMENT, "Partition location does not exist: " + partitionLocation);
}
metastore.addPartition(session, table.getDatabaseName(), table.getTableName(), buildPartitionObject(session, table, partitionValues, partitionLocation), partitionLocation, // no need for failed attempts cleanup
Optional.empty(), PartitionStatistics.empty(), false);
metastore.commit();
}
Aggregations