use of com.facebook.presto.spi.ColumnHandle in project presto by prestodb.
the class AbstractTestHiveFileSystem method testGetRecords.
@Test
public void testGetRecords() throws Exception {
try (Transaction transaction = newTransaction()) {
ConnectorMetadata metadata = transaction.getMetadata();
ConnectorSession session = newSession();
ConnectorTableHandle table = getTableHandle(metadata, this.table);
List<ColumnHandle> columnHandles = ImmutableList.copyOf(metadata.getColumnHandles(session, table).values());
Map<String, Integer> columnIndex = indexColumns(columnHandles);
List<ConnectorTableLayoutResult> tableLayoutResults = metadata.getTableLayouts(session, table, Constraint.alwaysTrue(), Optional.empty());
HiveTableLayoutHandle layoutHandle = (HiveTableLayoutHandle) getOnlyElement(tableLayoutResults).getTableLayout().getHandle();
assertEquals(layoutHandle.getPartitions().get().size(), 1);
ConnectorSplitSource splitSource = splitManager.getSplits(transaction.getTransactionHandle(), session, layoutHandle, SPLIT_SCHEDULING_CONTEXT);
TableHandle tableHandle = new TableHandle(new ConnectorId(database), table, transaction.getTransactionHandle(), Optional.of(layoutHandle));
long sum = 0;
for (ConnectorSplit split : getAllSplits(splitSource)) {
try (ConnectorPageSource pageSource = pageSourceProvider.createPageSource(transaction.getTransactionHandle(), session, split, tableHandle.getLayout().get(), columnHandles, NON_CACHEABLE)) {
MaterializedResult result = materializeSourceDataStream(session, pageSource, getTypes(columnHandles));
for (MaterializedRow row : result) {
sum += (Long) row.getField(columnIndex.get("t_bigint"));
}
}
}
// The test table is made up of multiple S3 objects with same data and different compression codec
// formats: uncompressed | .gz | .lz4 | .bz2
assertEquals(sum, 78300 * 4);
}
}
use of com.facebook.presto.spi.ColumnHandle in project presto by prestodb.
the class AbstractTestHiveFileSystem method createTable.
private void createTable(MetastoreContext metastoreContext, SchemaTableName tableName, HiveStorageFormat storageFormat) throws Exception {
List<ColumnMetadata> columns = ImmutableList.<ColumnMetadata>builder().add(new ColumnMetadata("id", BIGINT)).build();
MaterializedResult data = MaterializedResult.resultBuilder(newSession(), BIGINT).row(1L).row(3L).row(2L).build();
try (Transaction transaction = newTransaction()) {
ConnectorMetadata metadata = transaction.getMetadata();
ConnectorSession session = newSession();
// begin creating the table
ConnectorTableMetadata tableMetadata = new ConnectorTableMetadata(tableName, columns, createTableProperties(storageFormat));
ConnectorOutputTableHandle outputHandle = metadata.beginCreateTable(session, tableMetadata, Optional.empty());
// write the records
ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, outputHandle, TEST_HIVE_PAGE_SINK_CONTEXT);
sink.appendPage(data.toPage());
Collection<Slice> fragments = getFutureValue(sink.finish());
// commit the table
metadata.finishCreateTable(session, outputHandle, fragments, ImmutableList.of());
transaction.commit();
// Hack to work around the metastore not being configured for S3 or other FS.
// The metastore tries to validate the location when creating the
// table, which fails without explicit configuration for file system.
// We work around that by using a dummy location when creating the
// table and update it here to the correct location.
metastoreClient.updateTableLocation(metastoreContext, database, tableName.getTableName(), locationService.getTableWriteInfo(((HiveOutputTableHandle) outputHandle).getLocationHandle()).getTargetPath().toString());
}
try (Transaction transaction = newTransaction()) {
ConnectorMetadata metadata = transaction.getMetadata();
ConnectorSession session = newSession();
// load the new table
ConnectorTableHandle hiveTableHandle = getTableHandle(metadata, tableName);
List<ColumnHandle> columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, hiveTableHandle).values());
// verify the metadata
ConnectorTableMetadata tableMetadata = metadata.getTableMetadata(session, getTableHandle(metadata, tableName));
assertEquals(filterNonHiddenColumnMetadata(tableMetadata.getColumns()), columns);
// verify the data
List<ConnectorTableLayoutResult> tableLayoutResults = metadata.getTableLayouts(session, hiveTableHandle, Constraint.alwaysTrue(), Optional.empty());
HiveTableLayoutHandle layoutHandle = (HiveTableLayoutHandle) getOnlyElement(tableLayoutResults).getTableLayout().getHandle();
assertEquals(layoutHandle.getPartitions().get().size(), 1);
ConnectorSplitSource splitSource = splitManager.getSplits(transaction.getTransactionHandle(), session, layoutHandle, SPLIT_SCHEDULING_CONTEXT);
ConnectorSplit split = getOnlyElement(getAllSplits(splitSource));
TableHandle tableHandle = new TableHandle(new ConnectorId("hive"), hiveTableHandle, transaction.getTransactionHandle(), Optional.of(layoutHandle));
try (ConnectorPageSource pageSource = pageSourceProvider.createPageSource(transaction.getTransactionHandle(), session, split, tableHandle.getLayout().get(), columnHandles, NON_CACHEABLE)) {
MaterializedResult result = materializeSourceDataStream(session, pageSource, getTypes(columnHandles));
assertEqualsIgnoreOrder(result.getMaterializedRows(), data.getMaterializedRows());
}
}
}
use of com.facebook.presto.spi.ColumnHandle in project presto by prestodb.
the class MetastoreHiveStatisticsProvider method getTableStatistics.
private static TableStatistics getTableStatistics(Map<String, ColumnHandle> columns, Map<String, Type> columnTypes, List<HivePartition> partitions, Map<String, PartitionStatistics> statistics) {
if (statistics.isEmpty()) {
return TableStatistics.empty();
}
checkArgument(!partitions.isEmpty(), "partitions is empty");
OptionalDouble optionalAverageRowsPerPartition = calculateAverageRowsPerPartition(statistics.values());
if (!optionalAverageRowsPerPartition.isPresent()) {
return TableStatistics.empty();
}
double averageRowsPerPartition = optionalAverageRowsPerPartition.getAsDouble();
verify(averageRowsPerPartition >= 0, "averageRowsPerPartition must be greater than or equal to zero");
int queriedPartitionsCount = partitions.size();
double rowCount = averageRowsPerPartition * queriedPartitionsCount;
TableStatistics.Builder result = TableStatistics.builder();
result.setRowCount(Estimate.of(rowCount));
OptionalDouble optionalAverageSizePerPartition = calculateAverageSizePerPartition(statistics.values());
if (optionalAverageSizePerPartition.isPresent()) {
double averageSizePerPartition = optionalAverageSizePerPartition.getAsDouble();
verify(averageSizePerPartition >= 0, "averageSizePerPartition must be greater than or equal to zero: %s", averageSizePerPartition);
double totalSize = averageSizePerPartition * queriedPartitionsCount;
result.setTotalSize(Estimate.of(totalSize));
}
for (Map.Entry<String, ColumnHandle> column : columns.entrySet()) {
String columnName = column.getKey();
HiveColumnHandle columnHandle = (HiveColumnHandle) column.getValue();
Type columnType = columnTypes.get(columnName);
ColumnStatistics columnStatistics;
if (columnHandle.isPartitionKey()) {
columnStatistics = createPartitionColumnStatistics(columnHandle, columnType, partitions, statistics, averageRowsPerPartition, rowCount);
} else {
columnStatistics = createDataColumnStatistics(columnName, columnType, rowCount, statistics.values());
}
result.setColumnStatistics(columnHandle, columnStatistics);
}
return result.build();
}
use of com.facebook.presto.spi.ColumnHandle in project presto by prestodb.
the class AbstractTestHiveClient method doTestBucketSortedTables.
private void doTestBucketSortedTables(SchemaTableName table, boolean useTempPath, HiveStorageFormat storageFormat) throws IOException {
int bucketCount = 3;
int expectedRowCount = 0;
try (Transaction transaction = newTransaction()) {
ConnectorMetadata metadata = transaction.getMetadata();
ConnectorSession session = newSession(ImmutableMap.of(SORTED_WRITE_TO_TEMP_PATH_ENABLED, useTempPath));
// begin creating the table
ConnectorTableMetadata tableMetadata = new ConnectorTableMetadata(table, ImmutableList.<ColumnMetadata>builder().add(new ColumnMetadata("id", VARCHAR)).add(new ColumnMetadata("value_asc", VARCHAR)).add(new ColumnMetadata("value_desc", BIGINT)).add(new ColumnMetadata("ds", VARCHAR)).build(), ImmutableMap.<String, Object>builder().put(STORAGE_FORMAT_PROPERTY, storageFormat).put(PARTITIONED_BY_PROPERTY, ImmutableList.of("ds")).put(BUCKETED_BY_PROPERTY, ImmutableList.of("id")).put(BUCKET_COUNT_PROPERTY, bucketCount).put(SORTED_BY_PROPERTY, ImmutableList.builder().add(new SortingColumn("value_asc", SortingColumn.Order.ASCENDING)).add(new SortingColumn("value_desc", SortingColumn.Order.DESCENDING)).build()).build());
HiveOutputTableHandle outputHandle = (HiveOutputTableHandle) metadata.beginCreateTable(session, tableMetadata, Optional.empty());
// write the data
ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, outputHandle, TEST_HIVE_PAGE_SINK_CONTEXT);
List<Type> types = tableMetadata.getColumns().stream().map(ColumnMetadata::getType).collect(toList());
ThreadLocalRandom random = ThreadLocalRandom.current();
for (int i = 0; i < 50; i++) {
MaterializedResult.Builder builder = MaterializedResult.resultBuilder(session, types);
for (int j = 0; j < 1000; j++) {
builder.row(sha256().hashLong(random.nextLong()).toString(), "test" + random.nextInt(100), random.nextLong(100_000), "2018-04-01");
expectedRowCount++;
}
sink.appendPage(builder.build().toPage());
}
// verify we have enough temporary files per bucket to require multiple passes
Path path = useTempPath ? getTempFilePathRoot(outputHandle).get() : getStagingPathRoot(outputHandle);
HdfsContext context = new HdfsContext(session, table.getSchemaName(), table.getTableName(), outputHandle.getLocationHandle().getTargetPath().toString(), true);
Set<String> files = listAllDataFiles(context, path);
assertThat(listAllDataFiles(context, path)).filteredOn(file -> file.contains(".tmp-sort")).size().isGreaterThan(bucketCount * getHiveClientConfig().getMaxOpenSortFiles() * 2);
// finish the write
Collection<Slice> fragments = getFutureValue(sink.finish());
// verify there are no temporary files
for (String file : listAllDataFiles(context, path)) {
assertThat(file).doesNotContain(".tmp-sort.");
}
// finish creating table
metadata.finishCreateTable(session, outputHandle, fragments, ImmutableList.of());
transaction.commit();
}
// verify that bucket files are sorted
try (Transaction transaction = newTransaction()) {
ConnectorMetadata metadata = transaction.getMetadata();
ConnectorSession session = newSession(ImmutableMap.of(SORTED_WRITE_TO_TEMP_PATH_ENABLED, useTempPath));
ConnectorTableHandle hiveTableHandle = getTableHandle(metadata, table);
List<ColumnHandle> columnHandles = ImmutableList.copyOf(metadata.getColumnHandles(session, hiveTableHandle).values());
ConnectorTableLayoutHandle layoutHandle = getLayout(session, transaction, hiveTableHandle, TupleDomain.all());
List<ConnectorSplit> splits = getAllSplits(session, transaction, layoutHandle);
assertThat(splits).hasSize(bucketCount);
int actualRowCount = 0;
for (ConnectorSplit split : splits) {
try (ConnectorPageSource pageSource = pageSourceProvider.createPageSource(transaction.getTransactionHandle(), session, split, layoutHandle, columnHandles, NON_CACHEABLE)) {
String lastValueAsc = null;
long lastValueDesc = -1;
while (!pageSource.isFinished()) {
Page page = pageSource.getNextPage();
if (page == null) {
continue;
}
for (int i = 0; i < page.getPositionCount(); i++) {
Block blockAsc = page.getBlock(1);
Block blockDesc = page.getBlock(2);
assertFalse(blockAsc.isNull(i));
assertFalse(blockDesc.isNull(i));
String valueAsc = VARCHAR.getSlice(blockAsc, i).toStringUtf8();
if (lastValueAsc != null) {
assertGreaterThanOrEqual(valueAsc, lastValueAsc);
if (valueAsc.equals(lastValueAsc)) {
long valueDesc = BIGINT.getLong(blockDesc, i);
if (lastValueDesc != -1) {
assertLessThanOrEqual(valueDesc, lastValueDesc);
}
lastValueDesc = valueDesc;
} else {
lastValueDesc = -1;
}
}
lastValueAsc = valueAsc;
actualRowCount++;
}
}
}
}
assertThat(actualRowCount).isEqualTo(expectedRowCount);
}
}
use of com.facebook.presto.spi.ColumnHandle in project presto by prestodb.
the class AbstractTestHiveClient method indexColumns.
protected static ImmutableMap<String, Integer> indexColumns(List<ColumnHandle> columnHandles) {
ImmutableMap.Builder<String, Integer> index = ImmutableMap.builder();
int i = 0;
for (ColumnHandle columnHandle : columnHandles) {
HiveColumnHandle hiveColumnHandle = (HiveColumnHandle) columnHandle;
index.put(hiveColumnHandle.getName(), i);
i++;
}
return index.build();
}
Aggregations