use of io.prestosql.spi.connector.ConnectorTableHandle in project hetu-core by openlookeng.
the class AbstractTestHive method doInsertIntoExistingPartition.
private void doInsertIntoExistingPartition(HiveStorageFormat storageFormat, SchemaTableName tableName) throws Exception {
// creating the table
doCreateEmptyTable(tableName, storageFormat, CREATE_TABLE_COLUMNS_PARTITIONED);
MaterializedResult.Builder resultBuilder = MaterializedResult.resultBuilder(SESSION, CREATE_TABLE_PARTITIONED_DATA.getTypes());
for (int i = 0; i < 3; i++) {
// insert the data
insertData(tableName, CREATE_TABLE_PARTITIONED_DATA);
try (Transaction transaction = newTransaction()) {
ConnectorSession session = newSession();
ConnectorMetadata metadata = transaction.getMetadata();
metadata.beginQuery(session);
ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
// verify partitions were created
List<String> partitionNames = transaction.getMetastore(tableName.getSchemaName()).getPartitionNames(new HiveIdentity(session), tableName.getSchemaName(), tableName.getTableName()).orElseThrow(() -> new AssertionError("Table does not exist: " + tableName));
assertEqualsIgnoreOrder(partitionNames, CREATE_TABLE_PARTITIONED_DATA.getMaterializedRows().stream().map(row -> "ds=" + row.getField(CREATE_TABLE_PARTITIONED_DATA.getTypes().size() - 1)).collect(toList()));
// load the new table
List<ColumnHandle> columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values());
// verify the data
resultBuilder.rows(CREATE_TABLE_PARTITIONED_DATA.getMaterializedRows());
MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.of(storageFormat));
assertEqualsIgnoreOrder(result.getMaterializedRows(), resultBuilder.build().getMaterializedRows());
// test statistics
for (String partitionName : partitionNames) {
HiveBasicStatistics statistics = getBasicStatisticsForPartition(session, transaction, tableName, partitionName);
assertEquals(statistics.getRowCount().getAsLong(), i + 1L);
assertEquals(statistics.getFileCount().getAsLong(), i + 1L);
assertGreaterThan(statistics.getInMemoryDataSizeInBytes().getAsLong(), 0L);
assertGreaterThan(statistics.getOnDiskDataSizeInBytes().getAsLong(), 0L);
}
}
}
// test rollback
Set<String> existingFiles;
Path stagingPathRoot;
try (Transaction transaction = newTransaction()) {
ConnectorMetadata metadata = transaction.getMetadata();
ConnectorSession session = newSession();
existingFiles = listAllDataFiles(transaction, tableName.getSchemaName(), tableName.getTableName());
assertFalse(existingFiles.isEmpty());
ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
metadata.beginQuery(session);
// "stage" insert data
ConnectorInsertTableHandle insertTableHandle = metadata.beginInsert(session, tableHandle);
stagingPathRoot = getStagingPathRoot(insertTableHandle);
ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, insertTableHandle);
sink.appendPage(CREATE_TABLE_PARTITIONED_DATA.toPage());
sink.appendPage(CREATE_TABLE_PARTITIONED_DATA.toPage());
Collection<Slice> fragments = getFutureValue(sink.finish());
metadata.finishInsert(session, insertTableHandle, fragments, ImmutableList.of());
// verify all temp files start with the unique prefix
HdfsContext context = new HdfsContext(session, tableName.getSchemaName(), tableName.getTableName());
Set<String> tempFiles = listAllDataFiles(context, getStagingPathRoot(insertTableHandle));
assertTrue(!tempFiles.isEmpty());
for (String filePath : tempFiles) {
assertThat(new Path(filePath).getName()).startsWith(session.getQueryId());
}
// verify statistics are visible from within of the current transaction
List<String> partitionNames = transaction.getMetastore(tableName.getSchemaName()).getPartitionNames(new HiveIdentity(session), tableName.getSchemaName(), tableName.getTableName()).orElseThrow(() -> new AssertionError("Table does not exist: " + tableName));
for (String partitionName : partitionNames) {
HiveBasicStatistics partitionStatistics = getBasicStatisticsForPartition(session, transaction, tableName, partitionName);
assertEquals(partitionStatistics.getRowCount().getAsLong(), 5L);
}
// rollback insert
transaction.rollback();
}
try (Transaction transaction = newTransaction()) {
ConnectorMetadata metadata = transaction.getMetadata();
ConnectorSession session = newSession();
metadata.beginQuery(session);
ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
List<ColumnHandle> columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values());
// verify the data is unchanged
MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.empty());
assertEqualsIgnoreOrder(result.getMaterializedRows(), resultBuilder.build().getMaterializedRows());
// verify we did not modify the table directory
assertEquals(listAllDataFiles(transaction, tableName.getSchemaName(), tableName.getTableName()), existingFiles);
// verify temp directory is empty
HdfsContext hdfsContext = new HdfsContext(session, tableName.getSchemaName(), tableName.getTableName());
assertTrue(listAllDataFiles(hdfsContext, stagingPathRoot).isEmpty());
// verify statistics have been rolled back
HiveIdentity identity = new HiveIdentity(session);
List<String> partitionNames = transaction.getMetastore(tableName.getSchemaName()).getPartitionNames(identity, tableName.getSchemaName(), tableName.getTableName()).orElseThrow(() -> new AssertionError("Table does not exist: " + tableName));
for (String partitionName : partitionNames) {
HiveBasicStatistics partitionStatistics = getBasicStatisticsForPartition(session, transaction, tableName, partitionName);
assertEquals(partitionStatistics.getRowCount().getAsLong(), 3L);
}
}
}
use of io.prestosql.spi.connector.ConnectorTableHandle in project hetu-core by openlookeng.
the class AbstractTestHive method assertTableStatsComputed.
private void assertTableStatsComputed(SchemaTableName tableName, Set<String> expectedColumnStatsColumns) {
try (Transaction transaction = newTransaction()) {
ConnectorMetadata metadata = transaction.getMetadata();
ConnectorSession session = newSession();
ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
TableStatistics tableStatistics = metadata.getTableStatistics(session, tableHandle, Constraint.alwaysTrue(), true);
assertFalse(tableStatistics.getRowCount().isUnknown(), "row count is unknown");
Map<String, ColumnStatistics> columnsStatistics = tableStatistics.getColumnStatistics().entrySet().stream().collect(toImmutableMap(entry -> ((HiveColumnHandle) entry.getKey()).getName(), Map.Entry::getValue));
assertEquals(columnsStatistics.keySet(), expectedColumnStatsColumns, "columns with statistics");
Map<String, ColumnHandle> columnHandles = metadata.getColumnHandles(session, tableHandle);
columnsStatistics.forEach((columnName, columnStatistics) -> {
ColumnHandle columnHandle = columnHandles.get(columnName);
Type columnType = metadata.getColumnMetadata(session, tableHandle, columnHandle).getType();
assertFalse(columnStatistics.getNullsFraction().isUnknown(), "unknown nulls fraction for " + columnName);
assertFalse(columnStatistics.getDistinctValuesCount().isUnknown(), "unknown distinct values count for " + columnName);
if (isVarcharType(columnType)) {
assertFalse(columnStatistics.getDataSize().isUnknown(), "unknown data size for " + columnName);
} else {
assertTrue(columnStatistics.getDataSize().isUnknown(), "unknown data size for" + columnName);
}
});
}
}
use of io.prestosql.spi.connector.ConnectorTableHandle in project hetu-core by openlookeng.
the class AbstractTestHive method testGetRecords.
@Test
public void testGetRecords() throws Exception {
try (Transaction transaction = newTransaction()) {
ConnectorMetadata metadata = transaction.getMetadata();
ConnectorSession session = newSession();
metadata.beginQuery(session);
ConnectorTableHandle tableHandle = getTableHandle(metadata, tablePartitionFormat);
ConnectorTableMetadata tableMetadata = metadata.getTableMetadata(session, tableHandle);
List<ColumnHandle> columnHandles = ImmutableList.copyOf(metadata.getColumnHandles(session, tableHandle).values());
Map<String, Integer> columnIndex = indexColumns(columnHandles);
List<ConnectorSplit> splits = getAllSplits(tableHandle, transaction, session);
assertEquals(splits.size(), tablePartitionFormatPartitions.size());
for (ConnectorSplit split : splits) {
HiveSplit hiveSplit = getOnlyElement(((HiveSplitWrapper) split).getSplits());
List<HivePartitionKey> partitionKeys = hiveSplit.getPartitionKeys();
String ds = partitionKeys.get(0).getValue();
String fileFormat = partitionKeys.get(1).getValue();
HiveStorageFormat fileType = HiveStorageFormat.valueOf(fileFormat.toUpperCase(ENGLISH));
int dummyPartition = Integer.parseInt(partitionKeys.get(2).getValue());
long rowNumber = 0;
long completedBytes = 0;
try (ConnectorPageSource pageSource = pageSourceProvider.createPageSource(transaction.getTransactionHandle(), session, split, tableHandle, columnHandles)) {
MaterializedResult result = materializeSourceDataStream(session, pageSource, getTypes(columnHandles));
assertPageSourceType(pageSource, fileType);
for (MaterializedRow row : result) {
try {
assertValueTypes(row, tableMetadata.getColumns());
} catch (RuntimeException e) {
throw new RuntimeException("row " + rowNumber, e);
}
rowNumber++;
Object value;
value = row.getField(columnIndex.get("t_string"));
if (rowNumber % 19 == 0) {
assertNull(value);
} else if (rowNumber % 19 == 1) {
assertEquals(value, "");
} else {
assertEquals(value, "test");
}
assertEquals(row.getField(columnIndex.get("t_tinyint")), (byte) (1 + rowNumber));
assertEquals(row.getField(columnIndex.get("t_smallint")), (short) (2 + rowNumber));
assertEquals(row.getField(columnIndex.get("t_int")), 3 + (int) rowNumber);
if (rowNumber % 13 == 0) {
assertNull(row.getField(columnIndex.get("t_bigint")));
} else {
assertEquals(row.getField(columnIndex.get("t_bigint")), 4 + rowNumber);
}
assertEquals((Float) row.getField(columnIndex.get("t_float")), 5.1f + rowNumber, 0.001);
assertEquals(row.getField(columnIndex.get("t_double")), 6.2 + rowNumber);
if (rowNumber % 3 == 2) {
assertNull(row.getField(columnIndex.get("t_boolean")));
} else {
assertEquals(row.getField(columnIndex.get("t_boolean")), rowNumber % 3 != 0);
}
assertEquals(row.getField(columnIndex.get("ds")), ds);
assertEquals(row.getField(columnIndex.get("file_format")), fileFormat);
assertEquals(row.getField(columnIndex.get("dummy")), dummyPartition);
long newCompletedBytes = pageSource.getCompletedBytes();
assertTrue(newCompletedBytes >= completedBytes);
assertTrue(newCompletedBytes <= hiveSplit.getLength());
completedBytes = newCompletedBytes;
}
assertTrue(completedBytes <= hiveSplit.getLength());
assertEquals(rowNumber, 100);
}
}
}
}
use of io.prestosql.spi.connector.ConnectorTableHandle in project hetu-core by openlookeng.
the class AbstractTestHive method testGetPartialRecords.
@Test
public void testGetPartialRecords() throws Exception {
try (Transaction transaction = newTransaction()) {
ConnectorMetadata metadata = transaction.getMetadata();
ConnectorSession session = newSession();
metadata.beginQuery(session);
ConnectorTableHandle tableHandle = getTableHandle(metadata, tablePartitionFormat);
List<ColumnHandle> columnHandles = ImmutableList.copyOf(metadata.getColumnHandles(session, tableHandle).values());
Map<String, Integer> columnIndex = indexColumns(columnHandles);
List<ConnectorSplit> splits = getAllSplits(tableHandle, transaction, session);
assertEquals(splits.size(), tablePartitionFormatPartitions.size());
for (ConnectorSplit split : splits) {
HiveSplit hiveSplit = HiveSplitWrapper.getOnlyHiveSplit(split);
List<HivePartitionKey> partitionKeys = hiveSplit.getPartitionKeys();
String ds = partitionKeys.get(0).getValue();
String fileFormat = partitionKeys.get(1).getValue();
HiveStorageFormat fileType = HiveStorageFormat.valueOf(fileFormat.toUpperCase(ENGLISH));
int dummyPartition = Integer.parseInt(partitionKeys.get(2).getValue());
long rowNumber = 0;
try (ConnectorPageSource pageSource = pageSourceProvider.createPageSource(transaction.getTransactionHandle(), session, split, tableHandle, columnHandles)) {
assertPageSourceType(pageSource, fileType);
MaterializedResult result = materializeSourceDataStream(session, pageSource, getTypes(columnHandles));
for (MaterializedRow row : result) {
rowNumber++;
assertEquals(row.getField(columnIndex.get("t_double")), 6.2 + rowNumber);
assertEquals(row.getField(columnIndex.get("ds")), ds);
assertEquals(row.getField(columnIndex.get("file_format")), fileFormat);
assertEquals(row.getField(columnIndex.get("dummy")), dummyPartition);
}
}
assertEquals(rowNumber, 100);
}
}
}
use of io.prestosql.spi.connector.ConnectorTableHandle in project hetu-core by openlookeng.
the class AbstractTestHive method testGetPartitionsWithBindings.
@Test
public void testGetPartitionsWithBindings() {
try (Transaction transaction = newTransaction()) {
ConnectorMetadata metadata = transaction.getMetadata();
ConnectorTableHandle tableHandle = getTableHandle(metadata, tablePartitionFormat);
Constraint constraint = new Constraint(TupleDomain.withColumnDomains(ImmutableMap.of(intColumn, Domain.singleValue(BIGINT, 5L))));
tableHandle = applyFilter(metadata, tableHandle, constraint);
ConnectorTableProperties properties = metadata.getTableProperties(newSession(), tableHandle);
assertExpectedTableProperties(properties, tablePartitionFormatProperties);
assertExpectedPartitions(tableHandle, tablePartitionFormatPartitions);
}
}
Aggregations