use of io.prestosql.spi.connector.ConnectorPageSink in project hetu-core by openlookeng.
the class AbstractTestHiveFileSystem method createTable.
private void createTable(SchemaTableName tableName, HiveStorageFormat storageFormat) throws Exception {
List<ColumnMetadata> columns = ImmutableList.<ColumnMetadata>builder().add(new ColumnMetadata("id", BIGINT)).build();
MaterializedResult data = MaterializedResult.resultBuilder(newSession(), BIGINT).row(1L).row(3L).row(2L).build();
try (Transaction transaction = newTransaction()) {
ConnectorMetadata metadata = transaction.getMetadata();
ConnectorSession session = newSession();
// begin creating the table
ConnectorTableMetadata tableMetadata = new ConnectorTableMetadata(tableName, columns, createTableProperties(storageFormat));
ConnectorOutputTableHandle outputHandle = metadata.beginCreateTable(session, tableMetadata, Optional.empty());
// write the records
ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, outputHandle);
sink.appendPage(data.toPage());
Collection<Slice> fragments = getFutureValue(sink.finish());
// commit the table
metadata.finishCreateTable(session, outputHandle, fragments, ImmutableList.of());
transaction.commit();
// Hack to work around the metastore not being configured for S3 or other FS.
// The metastore tries to validate the location when creating the
// table, which fails without explicit configuration for file system.
// We work around that by using a dummy location when creating the
// table and update it here to the correct location.
metastoreClient.updateTableLocation(database, tableName.getTableName(), locationService.getTableWriteInfo(((HiveOutputTableHandle) outputHandle).getLocationHandle(), false).getTargetPath().toString());
}
try (Transaction transaction = newTransaction()) {
ConnectorMetadata metadata = transaction.getMetadata();
ConnectorSession session = newSession();
// load the new table
ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
List<ColumnHandle> columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values());
// verify the metadata
ConnectorTableMetadata tableMetadata = metadata.getTableMetadata(session, getTableHandle(metadata, tableName));
assertEquals(filterNonHiddenColumnMetadata(tableMetadata.getColumns()), columns);
// verify the data
ConnectorSplitSource splitSource = splitManager.getSplits(transaction.getTransactionHandle(), session, tableHandle, UNGROUPED_SCHEDULING);
ConnectorSplit split = getOnlyElement(getAllSplits(splitSource));
try (ConnectorPageSource pageSource = pageSourceProvider.createPageSource(transaction.getTransactionHandle(), session, split, tableHandle, columnHandles)) {
MaterializedResult result = materializeSourceDataStream(session, pageSource, getTypes(columnHandles));
assertEqualsIgnoreOrder(result.getMaterializedRows(), data.getMaterializedRows());
}
}
}
use of io.prestosql.spi.connector.ConnectorPageSink in project hetu-core by openlookeng.
the class AbstractTestHive method doInsertIntoExistingPartition.
private void doInsertIntoExistingPartition(HiveStorageFormat storageFormat, SchemaTableName tableName) throws Exception {
// creating the table
doCreateEmptyTable(tableName, storageFormat, CREATE_TABLE_COLUMNS_PARTITIONED);
MaterializedResult.Builder resultBuilder = MaterializedResult.resultBuilder(SESSION, CREATE_TABLE_PARTITIONED_DATA.getTypes());
for (int i = 0; i < 3; i++) {
// insert the data
insertData(tableName, CREATE_TABLE_PARTITIONED_DATA);
try (Transaction transaction = newTransaction()) {
ConnectorSession session = newSession();
ConnectorMetadata metadata = transaction.getMetadata();
metadata.beginQuery(session);
ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
// verify partitions were created
List<String> partitionNames = transaction.getMetastore(tableName.getSchemaName()).getPartitionNames(new HiveIdentity(session), tableName.getSchemaName(), tableName.getTableName()).orElseThrow(() -> new AssertionError("Table does not exist: " + tableName));
assertEqualsIgnoreOrder(partitionNames, CREATE_TABLE_PARTITIONED_DATA.getMaterializedRows().stream().map(row -> "ds=" + row.getField(CREATE_TABLE_PARTITIONED_DATA.getTypes().size() - 1)).collect(toList()));
// load the new table
List<ColumnHandle> columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values());
// verify the data
resultBuilder.rows(CREATE_TABLE_PARTITIONED_DATA.getMaterializedRows());
MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.of(storageFormat));
assertEqualsIgnoreOrder(result.getMaterializedRows(), resultBuilder.build().getMaterializedRows());
// test statistics
for (String partitionName : partitionNames) {
HiveBasicStatistics statistics = getBasicStatisticsForPartition(session, transaction, tableName, partitionName);
assertEquals(statistics.getRowCount().getAsLong(), i + 1L);
assertEquals(statistics.getFileCount().getAsLong(), i + 1L);
assertGreaterThan(statistics.getInMemoryDataSizeInBytes().getAsLong(), 0L);
assertGreaterThan(statistics.getOnDiskDataSizeInBytes().getAsLong(), 0L);
}
}
}
// test rollback
Set<String> existingFiles;
Path stagingPathRoot;
try (Transaction transaction = newTransaction()) {
ConnectorMetadata metadata = transaction.getMetadata();
ConnectorSession session = newSession();
existingFiles = listAllDataFiles(transaction, tableName.getSchemaName(), tableName.getTableName());
assertFalse(existingFiles.isEmpty());
ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
metadata.beginQuery(session);
// "stage" insert data
ConnectorInsertTableHandle insertTableHandle = metadata.beginInsert(session, tableHandle);
stagingPathRoot = getStagingPathRoot(insertTableHandle);
ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, insertTableHandle);
sink.appendPage(CREATE_TABLE_PARTITIONED_DATA.toPage());
sink.appendPage(CREATE_TABLE_PARTITIONED_DATA.toPage());
Collection<Slice> fragments = getFutureValue(sink.finish());
metadata.finishInsert(session, insertTableHandle, fragments, ImmutableList.of());
// verify all temp files start with the unique prefix
HdfsContext context = new HdfsContext(session, tableName.getSchemaName(), tableName.getTableName());
Set<String> tempFiles = listAllDataFiles(context, getStagingPathRoot(insertTableHandle));
assertTrue(!tempFiles.isEmpty());
for (String filePath : tempFiles) {
assertThat(new Path(filePath).getName()).startsWith(session.getQueryId());
}
// verify statistics are visible from within of the current transaction
List<String> partitionNames = transaction.getMetastore(tableName.getSchemaName()).getPartitionNames(new HiveIdentity(session), tableName.getSchemaName(), tableName.getTableName()).orElseThrow(() -> new AssertionError("Table does not exist: " + tableName));
for (String partitionName : partitionNames) {
HiveBasicStatistics partitionStatistics = getBasicStatisticsForPartition(session, transaction, tableName, partitionName);
assertEquals(partitionStatistics.getRowCount().getAsLong(), 5L);
}
// rollback insert
transaction.rollback();
}
try (Transaction transaction = newTransaction()) {
ConnectorMetadata metadata = transaction.getMetadata();
ConnectorSession session = newSession();
metadata.beginQuery(session);
ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
List<ColumnHandle> columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values());
// verify the data is unchanged
MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.empty());
assertEqualsIgnoreOrder(result.getMaterializedRows(), resultBuilder.build().getMaterializedRows());
// verify we did not modify the table directory
assertEquals(listAllDataFiles(transaction, tableName.getSchemaName(), tableName.getTableName()), existingFiles);
// verify temp directory is empty
HdfsContext hdfsContext = new HdfsContext(session, tableName.getSchemaName(), tableName.getTableName());
assertTrue(listAllDataFiles(hdfsContext, stagingPathRoot).isEmpty());
// verify statistics have been rolled back
HiveIdentity identity = new HiveIdentity(session);
List<String> partitionNames = transaction.getMetastore(tableName.getSchemaName()).getPartitionNames(identity, tableName.getSchemaName(), tableName.getTableName()).orElseThrow(() -> new AssertionError("Table does not exist: " + tableName));
for (String partitionName : partitionNames) {
HiveBasicStatistics partitionStatistics = getBasicStatisticsForPartition(session, transaction, tableName, partitionName);
assertEquals(partitionStatistics.getRowCount().getAsLong(), 3L);
}
}
}
use of io.prestosql.spi.connector.ConnectorPageSink in project hetu-core by openlookeng.
the class AbstractTestHive method doInsert.
private void doInsert(HiveStorageFormat storageFormat, SchemaTableName tableName) throws Exception {
// creating the table
doCreateEmptyTable(tableName, storageFormat, CREATE_TABLE_COLUMNS);
MaterializedResult.Builder resultBuilder = MaterializedResult.resultBuilder(SESSION, CREATE_TABLE_DATA.getTypes());
for (int i = 0; i < 3; i++) {
insertData(tableName, CREATE_TABLE_DATA);
try (Transaction transaction = newTransaction()) {
ConnectorSession session = newSession();
ConnectorMetadata metadata = transaction.getMetadata();
metadata.beginQuery(session);
// load the new table
ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
List<ColumnHandle> columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values());
// verify the metadata
ConnectorTableMetadata tableMetadata = metadata.getTableMetadata(session, getTableHandle(metadata, tableName));
assertEquals(filterNonHiddenColumnMetadata(tableMetadata.getColumns()), CREATE_TABLE_COLUMNS);
// verify the data
resultBuilder.rows(CREATE_TABLE_DATA.getMaterializedRows());
MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.empty());
assertEqualsIgnoreOrder(result.getMaterializedRows(), resultBuilder.build().getMaterializedRows());
// statistics
HiveBasicStatistics tableStatistics = getBasicStatisticsForTable(session, transaction, tableName);
assertEquals(tableStatistics.getRowCount().getAsLong(), CREATE_TABLE_DATA.getRowCount() * (i + 1));
assertEquals(tableStatistics.getFileCount().getAsLong(), i + 1L);
assertGreaterThan(tableStatistics.getInMemoryDataSizeInBytes().getAsLong(), 0L);
assertGreaterThan(tableStatistics.getOnDiskDataSizeInBytes().getAsLong(), 0L);
}
}
// test rollback
Set<String> existingFiles;
try (Transaction transaction = newTransaction()) {
existingFiles = listAllDataFiles(transaction, tableName.getSchemaName(), tableName.getTableName());
assertFalse(existingFiles.isEmpty());
}
Path stagingPathRoot;
try (Transaction transaction = newTransaction()) {
ConnectorSession session = newSession();
ConnectorMetadata metadata = transaction.getMetadata();
ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
// "stage" insert data
metadata.beginQuery(session);
ConnectorInsertTableHandle insertTableHandle = metadata.beginInsert(session, tableHandle);
ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, insertTableHandle);
sink.appendPage(CREATE_TABLE_DATA.toPage());
sink.appendPage(CREATE_TABLE_DATA.toPage());
Collection<Slice> fragments = getFutureValue(sink.finish());
metadata.finishInsert(session, insertTableHandle, fragments, ImmutableList.of());
// statistics, visible from within transaction
HiveBasicStatistics tableStatistics = getBasicStatisticsForTable(session, transaction, tableName);
assertEquals(tableStatistics.getRowCount().getAsLong(), CREATE_TABLE_DATA.getRowCount() * 5L);
try (Transaction otherTransaction = newTransaction()) {
// statistics, not visible from outside transaction
HiveBasicStatistics otherTableStatistics = getBasicStatisticsForTable(session, otherTransaction, tableName);
assertEquals(otherTableStatistics.getRowCount().getAsLong(), CREATE_TABLE_DATA.getRowCount() * 3L);
}
// verify all temp files start with the unique prefix
stagingPathRoot = getStagingPathRoot(insertTableHandle);
HdfsContext context = new HdfsContext(session, tableName.getSchemaName(), tableName.getTableName());
Set<String> tempFiles = listAllDataFiles(context, stagingPathRoot);
assertTrue(!tempFiles.isEmpty());
for (String filePath : tempFiles) {
assertThat(new Path(filePath).getName()).startsWith(session.getQueryId());
}
// rollback insert
transaction.rollback();
}
// verify temp directory is empty
HdfsContext context = new HdfsContext(newSession(), tableName.getSchemaName(), tableName.getTableName());
assertTrue(listAllDataFiles(context, stagingPathRoot).isEmpty());
// verify the data is unchanged
try (Transaction transaction = newTransaction()) {
ConnectorSession session = newSession();
ConnectorMetadata metadata = transaction.getMetadata();
metadata.beginQuery(session);
ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
List<ColumnHandle> columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values());
MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.empty());
assertEqualsIgnoreOrder(result.getMaterializedRows(), resultBuilder.build().getMaterializedRows());
// verify we did not modify the table directory
assertEquals(listAllDataFiles(transaction, tableName.getSchemaName(), tableName.getTableName()), existingFiles);
}
// verify statistics unchanged
try (Transaction transaction = newTransaction()) {
ConnectorSession session = newSession();
HiveBasicStatistics statistics = getBasicStatisticsForTable(session, transaction, tableName);
assertEquals(statistics.getRowCount().getAsLong(), CREATE_TABLE_DATA.getRowCount() * 3L);
assertEquals(statistics.getFileCount().getAsLong(), 3L);
}
}
use of io.prestosql.spi.connector.ConnectorPageSink in project hetu-core by openlookeng.
the class AbstractTestHive method doTestBucketSortedTables.
private void doTestBucketSortedTables(SchemaTableName table) throws IOException {
int bucketCount = 3;
int expectedRowCount = 0;
try (Transaction transaction = newTransaction()) {
ConnectorSession session = newSession();
ConnectorMetadata metadata = transaction.getMetadata();
// begin creating the table
ConnectorTableMetadata tableMetadata = new ConnectorTableMetadata(table, ImmutableList.<ColumnMetadata>builder().add(new ColumnMetadata("id", VARCHAR)).add(new ColumnMetadata("value_asc", VARCHAR)).add(new ColumnMetadata("value_desc", BIGINT)).add(new ColumnMetadata("ds", VARCHAR)).build(), ImmutableMap.<String, Object>builder().put(STORAGE_FORMAT_PROPERTY, RCBINARY).put(PARTITIONED_BY_PROPERTY, ImmutableList.of("ds")).put(BUCKETED_BY_PROPERTY, ImmutableList.of("id")).put(BUCKET_COUNT_PROPERTY, bucketCount).put(SORTED_BY_PROPERTY, ImmutableList.builder().add(new SortingColumn("value_asc", SortingColumn.Order.ASCENDING)).add(new SortingColumn("value_desc", SortingColumn.Order.DESCENDING)).build()).build());
ConnectorOutputTableHandle outputHandle = metadata.beginCreateTable(session, tableMetadata, Optional.empty());
// write the data
ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, outputHandle);
List<Type> types = tableMetadata.getColumns().stream().map(ColumnMetadata::getType).collect(toList());
ThreadLocalRandom random = ThreadLocalRandom.current();
for (int i = 0; i < 50; i++) {
MaterializedResult.Builder builder = MaterializedResult.resultBuilder(session, types);
for (int j = 0; j < 1000; j++) {
builder.row(sha256().hashLong(random.nextLong()).toString(), "test" + random.nextInt(100), random.nextLong(100_000), "2018-04-01");
expectedRowCount++;
}
sink.appendPage(builder.build().toPage());
}
// verify we have enough temporary files per bucket to require multiple passes
Path stagingPathRoot = getStagingPathRoot(outputHandle);
HdfsContext context = new HdfsContext(session, table.getSchemaName(), table.getTableName());
assertThat(listAllDataFiles(context, stagingPathRoot)).filteredOn(file -> file.contains(".tmp-sort.")).size().isGreaterThan(bucketCount * getHiveConfig().getMaxOpenSortFiles() * 2);
// finish the write
Collection<Slice> fragments = getFutureValue(sink.finish());
// verify there are no temporary files
for (String file : listAllDataFiles(context, stagingPathRoot)) {
assertThat(file).doesNotContain(".tmp-sort.");
}
// finish creating table
metadata.finishCreateTable(session, outputHandle, fragments, ImmutableList.of());
transaction.commit();
}
// verify that bucket files are sorted
try (Transaction transaction = newTransaction()) {
ConnectorMetadata metadata = transaction.getMetadata();
ConnectorSession session = newSession();
metadata.beginQuery(session);
ConnectorTableHandle tableHandle = getTableHandle(metadata, table);
List<ColumnHandle> columnHandles = ImmutableList.copyOf(metadata.getColumnHandles(session, tableHandle).values());
List<ConnectorSplit> splits = getAllSplits(tableHandle, transaction, session);
assertThat(splits).hasSize(bucketCount);
int actualRowCount = 0;
for (ConnectorSplit split : splits) {
try (ConnectorPageSource pageSource = pageSourceProvider.createPageSource(transaction.getTransactionHandle(), session, split, tableHandle, columnHandles)) {
String lastValueAsc = null;
long lastValueDesc = -1;
while (!pageSource.isFinished()) {
Page page = pageSource.getNextPage();
if (page == null) {
continue;
}
for (int i = 0; i < page.getPositionCount(); i++) {
Block blockAsc = page.getBlock(1);
Block blockDesc = page.getBlock(2);
assertFalse(blockAsc.isNull(i));
assertFalse(blockDesc.isNull(i));
String valueAsc = VARCHAR.getSlice(blockAsc, i).toStringUtf8();
if (lastValueAsc != null) {
assertGreaterThanOrEqual(valueAsc, lastValueAsc);
if (valueAsc.equals(lastValueAsc)) {
long valueDesc = BIGINT.getLong(blockDesc, i);
if (lastValueDesc != -1) {
assertLessThanOrEqual(valueDesc, lastValueDesc);
}
lastValueDesc = valueDesc;
} else {
lastValueDesc = -1;
}
}
lastValueAsc = valueAsc;
actualRowCount++;
}
}
}
}
assertThat(actualRowCount).isEqualTo(expectedRowCount);
}
}
use of io.prestosql.spi.connector.ConnectorPageSink in project hetu-core by openlookeng.
the class AbstractTestHive method doTestTransactionDeleteInsert.
private void doTestTransactionDeleteInsert(HiveStorageFormat storageFormat, SchemaTableName tableName, Domain domainToDrop, MaterializedResult insertData, MaterializedResult expectedData, TransactionDeleteInsertTestTag tag, boolean expectQuerySucceed, Optional<ConflictTrigger> conflictTrigger) throws Exception {
Path writePath = null;
Path targetPath = null;
try (Transaction transaction = newTransaction()) {
try {
ConnectorMetadata metadata = transaction.getMetadata();
ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
ConnectorSession session;
rollbackIfEquals(tag, ROLLBACK_RIGHT_AWAY);
// Query 1: delete
session = newSession();
HiveColumnHandle dsColumnHandle = (HiveColumnHandle) metadata.getColumnHandles(session, tableHandle).get("pk2");
TupleDomain<ColumnHandle> tupleDomain = TupleDomain.withColumnDomains(ImmutableMap.of(dsColumnHandle, domainToDrop));
Constraint constraint = new Constraint(tupleDomain, convertToPredicate(tupleDomain));
tableHandle = applyFilter(metadata, tableHandle, constraint);
tableHandle = metadata.applyDelete(session, tableHandle).get();
metadata.executeDelete(session, tableHandle);
rollbackIfEquals(tag, ROLLBACK_AFTER_DELETE);
// Query 2: insert
session = newSession();
ConnectorInsertTableHandle insertTableHandle = metadata.beginInsert(session, tableHandle);
rollbackIfEquals(tag, ROLLBACK_AFTER_BEGIN_INSERT);
writePath = getStagingPathRoot(insertTableHandle);
targetPath = getTargetPathRoot(insertTableHandle);
ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, insertTableHandle);
sink.appendPage(insertData.toPage());
rollbackIfEquals(tag, ROLLBACK_AFTER_APPEND_PAGE);
Collection<Slice> fragments = getFutureValue(sink.finish());
rollbackIfEquals(tag, ROLLBACK_AFTER_SINK_FINISH);
metadata.finishInsert(session, insertTableHandle, fragments, ImmutableList.of());
rollbackIfEquals(tag, ROLLBACK_AFTER_FINISH_INSERT);
assertEquals(tag, COMMIT);
if (conflictTrigger.isPresent()) {
JsonCodec<PartitionUpdate> partitionUpdateCodec = JsonCodec.jsonCodec(PartitionUpdate.class);
List<PartitionUpdate> partitionUpdates = fragments.stream().map(Slice::getBytes).map(partitionUpdateCodec::fromJson).collect(toList());
conflictTrigger.get().triggerConflict(session, tableName, insertTableHandle, partitionUpdates);
}
transaction.commit();
if (conflictTrigger.isPresent()) {
assertTrue(expectQuerySucceed);
conflictTrigger.get().verifyAndCleanup(session, tableName);
}
} catch (TestingRollbackException e) {
transaction.rollback();
} catch (PrestoException e) {
assertFalse(expectQuerySucceed);
if (conflictTrigger.isPresent()) {
conflictTrigger.get().verifyAndCleanup(newSession(), tableName);
}
}
}
// check that temporary files are removed
if (writePath != null && !writePath.equals(targetPath)) {
HdfsContext context = new HdfsContext(newSession(), tableName.getSchemaName(), tableName.getTableName());
FileSystem fileSystem = hdfsEnvironment.getFileSystem(context, writePath);
assertFalse(fileSystem.exists(writePath));
}
try (Transaction transaction = newTransaction()) {
// verify partitions
List<String> partitionNames = transaction.getMetastore(tableName.getSchemaName()).getPartitionNames(new HiveIdentity(newSession()), tableName.getSchemaName(), tableName.getTableName()).orElseThrow(() -> new AssertionError("Table does not exist: " + tableName));
assertEqualsIgnoreOrder(partitionNames, expectedData.getMaterializedRows().stream().map(row -> format("pk1=%s/pk2=%s", row.getField(1), row.getField(2))).distinct().collect(toList()));
// load the new table
ConnectorSession session = newSession();
ConnectorMetadata metadata = transaction.getMetadata();
metadata.beginQuery(session);
ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
List<ColumnHandle> columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values());
// verify the data
MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.of(storageFormat));
assertEqualsIgnoreOrder(result.getMaterializedRows(), expectedData.getMaterializedRows());
}
}
Aggregations