use of io.prestosql.plugin.hive.HdfsEnvironment.HdfsContext in project hetu-core by openlookeng.
the class SyncPartitionMetadataProcedure method doSyncPartitionMetadata.
private void doSyncPartitionMetadata(ConnectorSession session, String schemaName, String tableName, String mode) {
SyncMode syncMode = toSyncMode(mode);
HdfsContext hdfsContext = new HdfsContext(session, schemaName, tableName);
HiveIdentity identity = new HiveIdentity(session);
SemiTransactionalHiveMetastore metastore = ((HiveMetadata) hiveMetadataFactory.get()).getMetastore();
SchemaTableName schemaTableName = new SchemaTableName(schemaName, tableName);
Table table = metastore.getTable(identity, schemaName, tableName).orElseThrow(() -> new TableNotFoundException(schemaTableName));
if (table.getPartitionColumns().isEmpty()) {
throw new PrestoException(INVALID_PROCEDURE_ARGUMENT, "Table is not partitioned: " + schemaTableName);
}
Path tableLocation = new Path(table.getStorage().getLocation());
Set<String> partitionsToAdd;
Set<String> partitionsToDrop;
try {
FileSystem fileSystem = hdfsEnvironment.getFileSystem(hdfsContext, tableLocation);
List<String> partitionsInMetastore = metastore.getPartitionNames(identity, schemaName, tableName).orElseThrow(() -> new TableNotFoundException(schemaTableName));
List<String> partitionsInFileSystem = listDirectory(fileSystem, fileSystem.getFileStatus(tableLocation), table.getPartitionColumns(), table.getPartitionColumns().size()).stream().map(fileStatus -> fileStatus.getPath().toUri()).map(uri -> tableLocation.toUri().relativize(uri).getPath()).collect(toImmutableList());
// partitions in file system but not in metastore
partitionsToAdd = difference(partitionsInFileSystem, partitionsInMetastore);
// partitions in metastore but not in file system
partitionsToDrop = difference(partitionsInMetastore, partitionsInFileSystem);
} catch (IOException e) {
throw new PrestoException(HIVE_FILESYSTEM_ERROR, e);
}
syncPartitions(partitionsToAdd, partitionsToDrop, syncMode, metastore, session, table);
}
use of io.prestosql.plugin.hive.HdfsEnvironment.HdfsContext in project hetu-core by openlookeng.
the class AbstractTestHive method doInsertIntoNewPartition.
private void doInsertIntoNewPartition(HiveStorageFormat storageFormat, SchemaTableName tableName) throws Exception {
// creating the table
doCreateEmptyTable(tableName, storageFormat, CREATE_TABLE_COLUMNS_PARTITIONED);
// insert the data
String queryId = insertData(tableName, CREATE_TABLE_PARTITIONED_DATA);
Set<String> existingFiles;
try (Transaction transaction = newTransaction()) {
// verify partitions were created
HiveIdentity identity = new HiveIdentity(newSession());
List<String> partitionNames = transaction.getMetastore(tableName.getSchemaName()).getPartitionNames(identity, tableName.getSchemaName(), tableName.getTableName()).orElseThrow(() -> new AssertionError("Table does not exist: " + tableName));
assertEqualsIgnoreOrder(partitionNames, CREATE_TABLE_PARTITIONED_DATA.getMaterializedRows().stream().map(row -> "ds=" + row.getField(CREATE_TABLE_PARTITIONED_DATA.getTypes().size() - 1)).collect(toList()));
// verify the node versions in partitions
Map<String, Optional<Partition>> partitions = getMetastoreClient().getPartitionsByNames(identity, tableName.getSchemaName(), tableName.getTableName(), partitionNames);
assertEquals(partitions.size(), partitionNames.size());
for (String partitionName : partitionNames) {
Partition partition = partitions.get(partitionName).get();
assertEquals(partition.getParameters().get(PRESTO_VERSION_NAME), TEST_SERVER_VERSION);
assertEquals(partition.getParameters().get(PRESTO_QUERY_ID_NAME), queryId);
}
// load the new table
ConnectorSession session = newSession();
ConnectorMetadata metadata = transaction.getMetadata();
metadata.beginQuery(session);
ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
List<ColumnHandle> columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values());
// verify the data
MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.of(storageFormat));
assertEqualsIgnoreOrder(result.getMaterializedRows(), CREATE_TABLE_PARTITIONED_DATA.getMaterializedRows());
// test rollback
existingFiles = listAllDataFiles(transaction, tableName.getSchemaName(), tableName.getTableName());
assertFalse(existingFiles.isEmpty());
// test statistics
for (String partitionName : partitionNames) {
HiveBasicStatistics partitionStatistics = getBasicStatisticsForPartition(session, transaction, tableName, partitionName);
assertEquals(partitionStatistics.getRowCount().getAsLong(), 1L);
assertEquals(partitionStatistics.getFileCount().getAsLong(), 1L);
assertGreaterThan(partitionStatistics.getInMemoryDataSizeInBytes().getAsLong(), 0L);
assertGreaterThan(partitionStatistics.getOnDiskDataSizeInBytes().getAsLong(), 0L);
}
}
Path stagingPathRoot;
try (Transaction transaction = newTransaction()) {
ConnectorSession session = newSession();
ConnectorMetadata metadata = transaction.getMetadata();
ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
metadata.beginQuery(session);
// "stage" insert data
ConnectorInsertTableHandle insertTableHandle = metadata.beginInsert(session, tableHandle);
stagingPathRoot = getStagingPathRoot(insertTableHandle);
ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, insertTableHandle);
sink.appendPage(CREATE_TABLE_PARTITIONED_DATA_2ND.toPage());
Collection<Slice> fragments = getFutureValue(sink.finish());
metadata.finishInsert(session, insertTableHandle, fragments, ImmutableList.of());
// verify all temp files start with the unique prefix
HdfsContext context = new HdfsContext(session, tableName.getSchemaName(), tableName.getTableName());
Set<String> tempFiles = listAllDataFiles(context, getStagingPathRoot(insertTableHandle));
assertTrue(!tempFiles.isEmpty());
for (String filePath : tempFiles) {
assertThat(new Path(filePath).getName()).startsWith(session.getQueryId());
}
// rollback insert
transaction.rollback();
}
// verify the data is unchanged
try (Transaction transaction = newTransaction()) {
ConnectorSession session = newSession();
ConnectorMetadata metadata = transaction.getMetadata();
metadata.beginQuery(session);
ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
List<ColumnHandle> columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values());
MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.empty());
assertEqualsIgnoreOrder(result.getMaterializedRows(), CREATE_TABLE_PARTITIONED_DATA.getMaterializedRows());
// verify we did not modify the table directory
assertEquals(listAllDataFiles(transaction, tableName.getSchemaName(), tableName.getTableName()), existingFiles);
// verify temp directory is empty
HdfsContext context = new HdfsContext(session, tableName.getSchemaName(), tableName.getTableName());
assertTrue(listAllDataFiles(context, stagingPathRoot).isEmpty());
}
}
use of io.prestosql.plugin.hive.HdfsEnvironment.HdfsContext in project hetu-core by openlookeng.
the class AbstractTestHive method doInsertIntoExistingPartition.
private void doInsertIntoExistingPartition(HiveStorageFormat storageFormat, SchemaTableName tableName) throws Exception {
// creating the table
doCreateEmptyTable(tableName, storageFormat, CREATE_TABLE_COLUMNS_PARTITIONED);
MaterializedResult.Builder resultBuilder = MaterializedResult.resultBuilder(SESSION, CREATE_TABLE_PARTITIONED_DATA.getTypes());
for (int i = 0; i < 3; i++) {
// insert the data
insertData(tableName, CREATE_TABLE_PARTITIONED_DATA);
try (Transaction transaction = newTransaction()) {
ConnectorSession session = newSession();
ConnectorMetadata metadata = transaction.getMetadata();
metadata.beginQuery(session);
ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
// verify partitions were created
List<String> partitionNames = transaction.getMetastore(tableName.getSchemaName()).getPartitionNames(new HiveIdentity(session), tableName.getSchemaName(), tableName.getTableName()).orElseThrow(() -> new AssertionError("Table does not exist: " + tableName));
assertEqualsIgnoreOrder(partitionNames, CREATE_TABLE_PARTITIONED_DATA.getMaterializedRows().stream().map(row -> "ds=" + row.getField(CREATE_TABLE_PARTITIONED_DATA.getTypes().size() - 1)).collect(toList()));
// load the new table
List<ColumnHandle> columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values());
// verify the data
resultBuilder.rows(CREATE_TABLE_PARTITIONED_DATA.getMaterializedRows());
MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.of(storageFormat));
assertEqualsIgnoreOrder(result.getMaterializedRows(), resultBuilder.build().getMaterializedRows());
// test statistics
for (String partitionName : partitionNames) {
HiveBasicStatistics statistics = getBasicStatisticsForPartition(session, transaction, tableName, partitionName);
assertEquals(statistics.getRowCount().getAsLong(), i + 1L);
assertEquals(statistics.getFileCount().getAsLong(), i + 1L);
assertGreaterThan(statistics.getInMemoryDataSizeInBytes().getAsLong(), 0L);
assertGreaterThan(statistics.getOnDiskDataSizeInBytes().getAsLong(), 0L);
}
}
}
// test rollback
Set<String> existingFiles;
Path stagingPathRoot;
try (Transaction transaction = newTransaction()) {
ConnectorMetadata metadata = transaction.getMetadata();
ConnectorSession session = newSession();
existingFiles = listAllDataFiles(transaction, tableName.getSchemaName(), tableName.getTableName());
assertFalse(existingFiles.isEmpty());
ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
metadata.beginQuery(session);
// "stage" insert data
ConnectorInsertTableHandle insertTableHandle = metadata.beginInsert(session, tableHandle);
stagingPathRoot = getStagingPathRoot(insertTableHandle);
ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, insertTableHandle);
sink.appendPage(CREATE_TABLE_PARTITIONED_DATA.toPage());
sink.appendPage(CREATE_TABLE_PARTITIONED_DATA.toPage());
Collection<Slice> fragments = getFutureValue(sink.finish());
metadata.finishInsert(session, insertTableHandle, fragments, ImmutableList.of());
// verify all temp files start with the unique prefix
HdfsContext context = new HdfsContext(session, tableName.getSchemaName(), tableName.getTableName());
Set<String> tempFiles = listAllDataFiles(context, getStagingPathRoot(insertTableHandle));
assertTrue(!tempFiles.isEmpty());
for (String filePath : tempFiles) {
assertThat(new Path(filePath).getName()).startsWith(session.getQueryId());
}
// verify statistics are visible from within of the current transaction
List<String> partitionNames = transaction.getMetastore(tableName.getSchemaName()).getPartitionNames(new HiveIdentity(session), tableName.getSchemaName(), tableName.getTableName()).orElseThrow(() -> new AssertionError("Table does not exist: " + tableName));
for (String partitionName : partitionNames) {
HiveBasicStatistics partitionStatistics = getBasicStatisticsForPartition(session, transaction, tableName, partitionName);
assertEquals(partitionStatistics.getRowCount().getAsLong(), 5L);
}
// rollback insert
transaction.rollback();
}
try (Transaction transaction = newTransaction()) {
ConnectorMetadata metadata = transaction.getMetadata();
ConnectorSession session = newSession();
metadata.beginQuery(session);
ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
List<ColumnHandle> columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values());
// verify the data is unchanged
MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.empty());
assertEqualsIgnoreOrder(result.getMaterializedRows(), resultBuilder.build().getMaterializedRows());
// verify we did not modify the table directory
assertEquals(listAllDataFiles(transaction, tableName.getSchemaName(), tableName.getTableName()), existingFiles);
// verify temp directory is empty
HdfsContext hdfsContext = new HdfsContext(session, tableName.getSchemaName(), tableName.getTableName());
assertTrue(listAllDataFiles(hdfsContext, stagingPathRoot).isEmpty());
// verify statistics have been rolled back
HiveIdentity identity = new HiveIdentity(session);
List<String> partitionNames = transaction.getMetastore(tableName.getSchemaName()).getPartitionNames(identity, tableName.getSchemaName(), tableName.getTableName()).orElseThrow(() -> new AssertionError("Table does not exist: " + tableName));
for (String partitionName : partitionNames) {
HiveBasicStatistics partitionStatistics = getBasicStatisticsForPartition(session, transaction, tableName, partitionName);
assertEquals(partitionStatistics.getRowCount().getAsLong(), 3L);
}
}
}
use of io.prestosql.plugin.hive.HdfsEnvironment.HdfsContext in project hetu-core by openlookeng.
the class AbstractTestHive method assertEmptyFile.
private void assertEmptyFile(HiveStorageFormat format) throws Exception {
SchemaTableName tableName = temporaryTable("empty_file");
try {
List<Column> columns = ImmutableList.of(new Column("test", HIVE_STRING, Optional.empty()));
createEmptyTable(tableName, format, columns, ImmutableList.of());
try (Transaction transaction = newTransaction()) {
ConnectorSession session = newSession();
ConnectorMetadata metadata = transaction.getMetadata();
metadata.beginQuery(session);
ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
List<ColumnHandle> columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values());
Table table = transaction.getMetastore(tableName.getSchemaName()).getTable(new HiveIdentity(session), tableName.getSchemaName(), tableName.getTableName()).orElseThrow(AssertionError::new);
// verify directory is empty
HdfsContext context = new HdfsContext(session, tableName.getSchemaName(), tableName.getTableName());
Path location = new Path(table.getStorage().getLocation());
assertTrue(listDirectory(context, location).isEmpty());
// read table with empty directory
readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.of(0), Optional.of(ORC));
// create empty file
FileSystem fileSystem = hdfsEnvironment.getFileSystem(context, location);
assertTrue(fileSystem.createNewFile(new Path(location, "empty-file")));
assertEquals(listDirectory(context, location), ImmutableList.of("empty-file"));
// read table with empty file
MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.of(1), Optional.empty());
assertEquals(result.getRowCount(), 0);
}
} finally {
dropTable(tableName);
}
}
use of io.prestosql.plugin.hive.HdfsEnvironment.HdfsContext in project hetu-core by openlookeng.
the class AbstractTestHive method doInsert.
private void doInsert(HiveStorageFormat storageFormat, SchemaTableName tableName) throws Exception {
// creating the table
doCreateEmptyTable(tableName, storageFormat, CREATE_TABLE_COLUMNS);
MaterializedResult.Builder resultBuilder = MaterializedResult.resultBuilder(SESSION, CREATE_TABLE_DATA.getTypes());
for (int i = 0; i < 3; i++) {
insertData(tableName, CREATE_TABLE_DATA);
try (Transaction transaction = newTransaction()) {
ConnectorSession session = newSession();
ConnectorMetadata metadata = transaction.getMetadata();
metadata.beginQuery(session);
// load the new table
ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
List<ColumnHandle> columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values());
// verify the metadata
ConnectorTableMetadata tableMetadata = metadata.getTableMetadata(session, getTableHandle(metadata, tableName));
assertEquals(filterNonHiddenColumnMetadata(tableMetadata.getColumns()), CREATE_TABLE_COLUMNS);
// verify the data
resultBuilder.rows(CREATE_TABLE_DATA.getMaterializedRows());
MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.empty());
assertEqualsIgnoreOrder(result.getMaterializedRows(), resultBuilder.build().getMaterializedRows());
// statistics
HiveBasicStatistics tableStatistics = getBasicStatisticsForTable(session, transaction, tableName);
assertEquals(tableStatistics.getRowCount().getAsLong(), CREATE_TABLE_DATA.getRowCount() * (i + 1));
assertEquals(tableStatistics.getFileCount().getAsLong(), i + 1L);
assertGreaterThan(tableStatistics.getInMemoryDataSizeInBytes().getAsLong(), 0L);
assertGreaterThan(tableStatistics.getOnDiskDataSizeInBytes().getAsLong(), 0L);
}
}
// test rollback
Set<String> existingFiles;
try (Transaction transaction = newTransaction()) {
existingFiles = listAllDataFiles(transaction, tableName.getSchemaName(), tableName.getTableName());
assertFalse(existingFiles.isEmpty());
}
Path stagingPathRoot;
try (Transaction transaction = newTransaction()) {
ConnectorSession session = newSession();
ConnectorMetadata metadata = transaction.getMetadata();
ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
// "stage" insert data
metadata.beginQuery(session);
ConnectorInsertTableHandle insertTableHandle = metadata.beginInsert(session, tableHandle);
ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, insertTableHandle);
sink.appendPage(CREATE_TABLE_DATA.toPage());
sink.appendPage(CREATE_TABLE_DATA.toPage());
Collection<Slice> fragments = getFutureValue(sink.finish());
metadata.finishInsert(session, insertTableHandle, fragments, ImmutableList.of());
// statistics, visible from within transaction
HiveBasicStatistics tableStatistics = getBasicStatisticsForTable(session, transaction, tableName);
assertEquals(tableStatistics.getRowCount().getAsLong(), CREATE_TABLE_DATA.getRowCount() * 5L);
try (Transaction otherTransaction = newTransaction()) {
// statistics, not visible from outside transaction
HiveBasicStatistics otherTableStatistics = getBasicStatisticsForTable(session, otherTransaction, tableName);
assertEquals(otherTableStatistics.getRowCount().getAsLong(), CREATE_TABLE_DATA.getRowCount() * 3L);
}
// verify all temp files start with the unique prefix
stagingPathRoot = getStagingPathRoot(insertTableHandle);
HdfsContext context = new HdfsContext(session, tableName.getSchemaName(), tableName.getTableName());
Set<String> tempFiles = listAllDataFiles(context, stagingPathRoot);
assertTrue(!tempFiles.isEmpty());
for (String filePath : tempFiles) {
assertThat(new Path(filePath).getName()).startsWith(session.getQueryId());
}
// rollback insert
transaction.rollback();
}
// verify temp directory is empty
HdfsContext context = new HdfsContext(newSession(), tableName.getSchemaName(), tableName.getTableName());
assertTrue(listAllDataFiles(context, stagingPathRoot).isEmpty());
// verify the data is unchanged
try (Transaction transaction = newTransaction()) {
ConnectorSession session = newSession();
ConnectorMetadata metadata = transaction.getMetadata();
metadata.beginQuery(session);
ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
List<ColumnHandle> columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values());
MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.empty());
assertEqualsIgnoreOrder(result.getMaterializedRows(), resultBuilder.build().getMaterializedRows());
// verify we did not modify the table directory
assertEquals(listAllDataFiles(transaction, tableName.getSchemaName(), tableName.getTableName()), existingFiles);
}
// verify statistics unchanged
try (Transaction transaction = newTransaction()) {
ConnectorSession session = newSession();
HiveBasicStatistics statistics = getBasicStatisticsForTable(session, transaction, tableName);
assertEquals(statistics.getRowCount().getAsLong(), CREATE_TABLE_DATA.getRowCount() * 3L);
assertEquals(statistics.getFileCount().getAsLong(), 3L);
}
}
Aggregations