Search in sources :

Example 81 with ColumnHandle

use of com.facebook.presto.spi.ColumnHandle in project presto by prestodb.

the class AbstractTestHiveFileSystem method testGetRecords.

@Test
public void testGetRecords() throws Exception {
    try (Transaction transaction = newTransaction()) {
        ConnectorMetadata metadata = transaction.getMetadata();
        ConnectorSession session = newSession();
        ConnectorTableHandle table = getTableHandle(metadata, this.table);
        List<ColumnHandle> columnHandles = ImmutableList.copyOf(metadata.getColumnHandles(session, table).values());
        Map<String, Integer> columnIndex = indexColumns(columnHandles);
        List<ConnectorTableLayoutResult> tableLayoutResults = metadata.getTableLayouts(session, table, Constraint.alwaysTrue(), Optional.empty());
        HiveTableLayoutHandle layoutHandle = (HiveTableLayoutHandle) getOnlyElement(tableLayoutResults).getTableLayout().getHandle();
        assertEquals(layoutHandle.getPartitions().get().size(), 1);
        ConnectorSplitSource splitSource = splitManager.getSplits(transaction.getTransactionHandle(), session, layoutHandle, SPLIT_SCHEDULING_CONTEXT);
        TableHandle tableHandle = new TableHandle(new ConnectorId(database), table, transaction.getTransactionHandle(), Optional.of(layoutHandle));
        long sum = 0;
        for (ConnectorSplit split : getAllSplits(splitSource)) {
            try (ConnectorPageSource pageSource = pageSourceProvider.createPageSource(transaction.getTransactionHandle(), session, split, tableHandle.getLayout().get(), columnHandles, NON_CACHEABLE)) {
                MaterializedResult result = materializeSourceDataStream(session, pageSource, getTypes(columnHandles));
                for (MaterializedRow row : result) {
                    sum += (Long) row.getField(columnIndex.get("t_bigint"));
                }
            }
        }
        // The test table is made up of multiple S3 objects with same data and different compression codec
        // formats: uncompressed | .gz | .lz4 | .bz2
        assertEquals(sum, 78300 * 4);
    }
}
Also used : ColumnHandle(com.facebook.presto.spi.ColumnHandle) ConnectorSplitSource(com.facebook.presto.spi.ConnectorSplitSource) ConnectorPageSource(com.facebook.presto.spi.ConnectorPageSource) ConnectorTableHandle(com.facebook.presto.spi.ConnectorTableHandle) HiveTransaction(com.facebook.presto.hive.AbstractTestHiveClient.HiveTransaction) Transaction(com.facebook.presto.hive.AbstractTestHiveClient.Transaction) ConnectorTableLayoutResult(com.facebook.presto.spi.ConnectorTableLayoutResult) ConnectorSession(com.facebook.presto.spi.ConnectorSession) TestingConnectorSession(com.facebook.presto.testing.TestingConnectorSession) ConnectorOutputTableHandle(com.facebook.presto.spi.ConnectorOutputTableHandle) TableHandle(com.facebook.presto.spi.TableHandle) ConnectorTableHandle(com.facebook.presto.spi.ConnectorTableHandle) ConnectorMetadata(com.facebook.presto.spi.connector.ConnectorMetadata) MaterializedResult(com.facebook.presto.testing.MaterializedResult) ConnectorSplit(com.facebook.presto.spi.ConnectorSplit) MaterializedRow(com.facebook.presto.testing.MaterializedRow) ConnectorId(com.facebook.presto.spi.ConnectorId) Test(org.testng.annotations.Test)

Example 82 with ColumnHandle

use of com.facebook.presto.spi.ColumnHandle in project presto by prestodb.

the class AbstractTestHiveFileSystem method createTable.

private void createTable(MetastoreContext metastoreContext, SchemaTableName tableName, HiveStorageFormat storageFormat) throws Exception {
    List<ColumnMetadata> columns = ImmutableList.<ColumnMetadata>builder().add(new ColumnMetadata("id", BIGINT)).build();
    MaterializedResult data = MaterializedResult.resultBuilder(newSession(), BIGINT).row(1L).row(3L).row(2L).build();
    try (Transaction transaction = newTransaction()) {
        ConnectorMetadata metadata = transaction.getMetadata();
        ConnectorSession session = newSession();
        // begin creating the table
        ConnectorTableMetadata tableMetadata = new ConnectorTableMetadata(tableName, columns, createTableProperties(storageFormat));
        ConnectorOutputTableHandle outputHandle = metadata.beginCreateTable(session, tableMetadata, Optional.empty());
        // write the records
        ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, outputHandle, TEST_HIVE_PAGE_SINK_CONTEXT);
        sink.appendPage(data.toPage());
        Collection<Slice> fragments = getFutureValue(sink.finish());
        // commit the table
        metadata.finishCreateTable(session, outputHandle, fragments, ImmutableList.of());
        transaction.commit();
        // Hack to work around the metastore not being configured for S3 or other FS.
        // The metastore tries to validate the location when creating the
        // table, which fails without explicit configuration for file system.
        // We work around that by using a dummy location when creating the
        // table and update it here to the correct location.
        metastoreClient.updateTableLocation(metastoreContext, database, tableName.getTableName(), locationService.getTableWriteInfo(((HiveOutputTableHandle) outputHandle).getLocationHandle()).getTargetPath().toString());
    }
    try (Transaction transaction = newTransaction()) {
        ConnectorMetadata metadata = transaction.getMetadata();
        ConnectorSession session = newSession();
        // load the new table
        ConnectorTableHandle hiveTableHandle = getTableHandle(metadata, tableName);
        List<ColumnHandle> columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, hiveTableHandle).values());
        // verify the metadata
        ConnectorTableMetadata tableMetadata = metadata.getTableMetadata(session, getTableHandle(metadata, tableName));
        assertEquals(filterNonHiddenColumnMetadata(tableMetadata.getColumns()), columns);
        // verify the data
        List<ConnectorTableLayoutResult> tableLayoutResults = metadata.getTableLayouts(session, hiveTableHandle, Constraint.alwaysTrue(), Optional.empty());
        HiveTableLayoutHandle layoutHandle = (HiveTableLayoutHandle) getOnlyElement(tableLayoutResults).getTableLayout().getHandle();
        assertEquals(layoutHandle.getPartitions().get().size(), 1);
        ConnectorSplitSource splitSource = splitManager.getSplits(transaction.getTransactionHandle(), session, layoutHandle, SPLIT_SCHEDULING_CONTEXT);
        ConnectorSplit split = getOnlyElement(getAllSplits(splitSource));
        TableHandle tableHandle = new TableHandle(new ConnectorId("hive"), hiveTableHandle, transaction.getTransactionHandle(), Optional.of(layoutHandle));
        try (ConnectorPageSource pageSource = pageSourceProvider.createPageSource(transaction.getTransactionHandle(), session, split, tableHandle.getLayout().get(), columnHandles, NON_CACHEABLE)) {
            MaterializedResult result = materializeSourceDataStream(session, pageSource, getTypes(columnHandles));
            assertEqualsIgnoreOrder(result.getMaterializedRows(), data.getMaterializedRows());
        }
    }
}
Also used : ColumnHandle(com.facebook.presto.spi.ColumnHandle) ColumnMetadata(com.facebook.presto.spi.ColumnMetadata) AbstractTestHiveClient.filterNonHiddenColumnMetadata(com.facebook.presto.hive.AbstractTestHiveClient.filterNonHiddenColumnMetadata) ConnectorSplitSource(com.facebook.presto.spi.ConnectorSplitSource) ConnectorPageSource(com.facebook.presto.spi.ConnectorPageSource) ConnectorTableHandle(com.facebook.presto.spi.ConnectorTableHandle) ConnectorOutputTableHandle(com.facebook.presto.spi.ConnectorOutputTableHandle) HiveTransaction(com.facebook.presto.hive.AbstractTestHiveClient.HiveTransaction) Transaction(com.facebook.presto.hive.AbstractTestHiveClient.Transaction) ConnectorTableLayoutResult(com.facebook.presto.spi.ConnectorTableLayoutResult) Slice(io.airlift.slice.Slice) ConnectorSession(com.facebook.presto.spi.ConnectorSession) TestingConnectorSession(com.facebook.presto.testing.TestingConnectorSession) ConnectorOutputTableHandle(com.facebook.presto.spi.ConnectorOutputTableHandle) TableHandle(com.facebook.presto.spi.TableHandle) ConnectorTableHandle(com.facebook.presto.spi.ConnectorTableHandle) ConnectorMetadata(com.facebook.presto.spi.connector.ConnectorMetadata) MaterializedResult(com.facebook.presto.testing.MaterializedResult) ConnectorPageSink(com.facebook.presto.spi.ConnectorPageSink) ConnectorSplit(com.facebook.presto.spi.ConnectorSplit) ConnectorTableMetadata(com.facebook.presto.spi.ConnectorTableMetadata) ConnectorId(com.facebook.presto.spi.ConnectorId)

Example 83 with ColumnHandle

use of com.facebook.presto.spi.ColumnHandle in project presto by prestodb.

the class MetastoreHiveStatisticsProvider method getTableStatistics.

private static TableStatistics getTableStatistics(Map<String, ColumnHandle> columns, Map<String, Type> columnTypes, List<HivePartition> partitions, Map<String, PartitionStatistics> statistics) {
    if (statistics.isEmpty()) {
        return TableStatistics.empty();
    }
    checkArgument(!partitions.isEmpty(), "partitions is empty");
    OptionalDouble optionalAverageRowsPerPartition = calculateAverageRowsPerPartition(statistics.values());
    if (!optionalAverageRowsPerPartition.isPresent()) {
        return TableStatistics.empty();
    }
    double averageRowsPerPartition = optionalAverageRowsPerPartition.getAsDouble();
    verify(averageRowsPerPartition >= 0, "averageRowsPerPartition must be greater than or equal to zero");
    int queriedPartitionsCount = partitions.size();
    double rowCount = averageRowsPerPartition * queriedPartitionsCount;
    TableStatistics.Builder result = TableStatistics.builder();
    result.setRowCount(Estimate.of(rowCount));
    OptionalDouble optionalAverageSizePerPartition = calculateAverageSizePerPartition(statistics.values());
    if (optionalAverageSizePerPartition.isPresent()) {
        double averageSizePerPartition = optionalAverageSizePerPartition.getAsDouble();
        verify(averageSizePerPartition >= 0, "averageSizePerPartition must be greater than or equal to zero: %s", averageSizePerPartition);
        double totalSize = averageSizePerPartition * queriedPartitionsCount;
        result.setTotalSize(Estimate.of(totalSize));
    }
    for (Map.Entry<String, ColumnHandle> column : columns.entrySet()) {
        String columnName = column.getKey();
        HiveColumnHandle columnHandle = (HiveColumnHandle) column.getValue();
        Type columnType = columnTypes.get(columnName);
        ColumnStatistics columnStatistics;
        if (columnHandle.isPartitionKey()) {
            columnStatistics = createPartitionColumnStatistics(columnHandle, columnType, partitions, statistics, averageRowsPerPartition, rowCount);
        } else {
            columnStatistics = createDataColumnStatistics(columnName, columnType, rowCount, statistics.values());
        }
        result.setColumnStatistics(columnHandle, columnStatistics);
    }
    return result.build();
}
Also used : ColumnStatistics(com.facebook.presto.spi.statistics.ColumnStatistics) HiveColumnStatistics(com.facebook.presto.hive.metastore.HiveColumnStatistics) HiveColumnHandle(com.facebook.presto.hive.HiveColumnHandle) ColumnHandle(com.facebook.presto.spi.ColumnHandle) Varchars.isVarcharType(com.facebook.presto.common.type.Varchars.isVarcharType) DecimalType(com.facebook.presto.common.type.DecimalType) Chars.isCharType(com.facebook.presto.common.type.Chars.isCharType) Type(com.facebook.presto.common.type.Type) TableStatistics(com.facebook.presto.spi.statistics.TableStatistics) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) OptionalDouble(java.util.OptionalDouble) HiveColumnHandle(com.facebook.presto.hive.HiveColumnHandle)

Example 84 with ColumnHandle

use of com.facebook.presto.spi.ColumnHandle in project presto by prestodb.

the class AbstractTestHiveClient method doTestBucketSortedTables.

private void doTestBucketSortedTables(SchemaTableName table, boolean useTempPath, HiveStorageFormat storageFormat) throws IOException {
    int bucketCount = 3;
    int expectedRowCount = 0;
    try (Transaction transaction = newTransaction()) {
        ConnectorMetadata metadata = transaction.getMetadata();
        ConnectorSession session = newSession(ImmutableMap.of(SORTED_WRITE_TO_TEMP_PATH_ENABLED, useTempPath));
        // begin creating the table
        ConnectorTableMetadata tableMetadata = new ConnectorTableMetadata(table, ImmutableList.<ColumnMetadata>builder().add(new ColumnMetadata("id", VARCHAR)).add(new ColumnMetadata("value_asc", VARCHAR)).add(new ColumnMetadata("value_desc", BIGINT)).add(new ColumnMetadata("ds", VARCHAR)).build(), ImmutableMap.<String, Object>builder().put(STORAGE_FORMAT_PROPERTY, storageFormat).put(PARTITIONED_BY_PROPERTY, ImmutableList.of("ds")).put(BUCKETED_BY_PROPERTY, ImmutableList.of("id")).put(BUCKET_COUNT_PROPERTY, bucketCount).put(SORTED_BY_PROPERTY, ImmutableList.builder().add(new SortingColumn("value_asc", SortingColumn.Order.ASCENDING)).add(new SortingColumn("value_desc", SortingColumn.Order.DESCENDING)).build()).build());
        HiveOutputTableHandle outputHandle = (HiveOutputTableHandle) metadata.beginCreateTable(session, tableMetadata, Optional.empty());
        // write the data
        ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, outputHandle, TEST_HIVE_PAGE_SINK_CONTEXT);
        List<Type> types = tableMetadata.getColumns().stream().map(ColumnMetadata::getType).collect(toList());
        ThreadLocalRandom random = ThreadLocalRandom.current();
        for (int i = 0; i < 50; i++) {
            MaterializedResult.Builder builder = MaterializedResult.resultBuilder(session, types);
            for (int j = 0; j < 1000; j++) {
                builder.row(sha256().hashLong(random.nextLong()).toString(), "test" + random.nextInt(100), random.nextLong(100_000), "2018-04-01");
                expectedRowCount++;
            }
            sink.appendPage(builder.build().toPage());
        }
        // verify we have enough temporary files per bucket to require multiple passes
        Path path = useTempPath ? getTempFilePathRoot(outputHandle).get() : getStagingPathRoot(outputHandle);
        HdfsContext context = new HdfsContext(session, table.getSchemaName(), table.getTableName(), outputHandle.getLocationHandle().getTargetPath().toString(), true);
        Set<String> files = listAllDataFiles(context, path);
        assertThat(listAllDataFiles(context, path)).filteredOn(file -> file.contains(".tmp-sort")).size().isGreaterThan(bucketCount * getHiveClientConfig().getMaxOpenSortFiles() * 2);
        // finish the write
        Collection<Slice> fragments = getFutureValue(sink.finish());
        // verify there are no temporary files
        for (String file : listAllDataFiles(context, path)) {
            assertThat(file).doesNotContain(".tmp-sort.");
        }
        // finish creating table
        metadata.finishCreateTable(session, outputHandle, fragments, ImmutableList.of());
        transaction.commit();
    }
    // verify that bucket files are sorted
    try (Transaction transaction = newTransaction()) {
        ConnectorMetadata metadata = transaction.getMetadata();
        ConnectorSession session = newSession(ImmutableMap.of(SORTED_WRITE_TO_TEMP_PATH_ENABLED, useTempPath));
        ConnectorTableHandle hiveTableHandle = getTableHandle(metadata, table);
        List<ColumnHandle> columnHandles = ImmutableList.copyOf(metadata.getColumnHandles(session, hiveTableHandle).values());
        ConnectorTableLayoutHandle layoutHandle = getLayout(session, transaction, hiveTableHandle, TupleDomain.all());
        List<ConnectorSplit> splits = getAllSplits(session, transaction, layoutHandle);
        assertThat(splits).hasSize(bucketCount);
        int actualRowCount = 0;
        for (ConnectorSplit split : splits) {
            try (ConnectorPageSource pageSource = pageSourceProvider.createPageSource(transaction.getTransactionHandle(), session, split, layoutHandle, columnHandles, NON_CACHEABLE)) {
                String lastValueAsc = null;
                long lastValueDesc = -1;
                while (!pageSource.isFinished()) {
                    Page page = pageSource.getNextPage();
                    if (page == null) {
                        continue;
                    }
                    for (int i = 0; i < page.getPositionCount(); i++) {
                        Block blockAsc = page.getBlock(1);
                        Block blockDesc = page.getBlock(2);
                        assertFalse(blockAsc.isNull(i));
                        assertFalse(blockDesc.isNull(i));
                        String valueAsc = VARCHAR.getSlice(blockAsc, i).toStringUtf8();
                        if (lastValueAsc != null) {
                            assertGreaterThanOrEqual(valueAsc, lastValueAsc);
                            if (valueAsc.equals(lastValueAsc)) {
                                long valueDesc = BIGINT.getLong(blockDesc, i);
                                if (lastValueDesc != -1) {
                                    assertLessThanOrEqual(valueDesc, lastValueDesc);
                                }
                                lastValueDesc = valueDesc;
                            } else {
                                lastValueDesc = -1;
                            }
                        }
                        lastValueAsc = valueAsc;
                        actualRowCount++;
                    }
                }
            }
        }
        assertThat(actualRowCount).isEqualTo(expectedRowCount);
    }
}
Also used : ColumnMetadata(com.facebook.presto.spi.ColumnMetadata) Page(com.facebook.presto.common.Page) ConnectorPageSource(com.facebook.presto.spi.ConnectorPageSource) ThreadLocalRandom(java.util.concurrent.ThreadLocalRandom) TestingConnectorSession(com.facebook.presto.testing.TestingConnectorSession) ConnectorSession(com.facebook.presto.spi.ConnectorSession) ConnectorMetadata(com.facebook.presto.spi.connector.ConnectorMetadata) ConnectorTableMetadata(com.facebook.presto.spi.ConnectorTableMetadata) Path(org.apache.hadoop.fs.Path) HiveColumnHandle.bucketColumnHandle(com.facebook.presto.hive.HiveColumnHandle.bucketColumnHandle) ColumnHandle(com.facebook.presto.spi.ColumnHandle) ConnectorTableLayoutHandle(com.facebook.presto.spi.ConnectorTableLayoutHandle) SortingColumn(com.facebook.presto.hive.metastore.SortingColumn) Constraint(com.facebook.presto.spi.Constraint) ConnectorTableHandle(com.facebook.presto.spi.ConnectorTableHandle) CharType.createCharType(com.facebook.presto.common.type.CharType.createCharType) VarcharType.createUnboundedVarcharType(com.facebook.presto.common.type.VarcharType.createUnboundedVarcharType) HiveTestUtils.arrayType(com.facebook.presto.hive.HiveTestUtils.arrayType) RowType(com.facebook.presto.common.type.RowType) MapType(com.facebook.presto.common.type.MapType) HiveTestUtils.mapType(com.facebook.presto.hive.HiveTestUtils.mapType) Type(com.facebook.presto.common.type.Type) DecimalType.createDecimalType(com.facebook.presto.common.type.DecimalType.createDecimalType) Varchars.isVarcharType(com.facebook.presto.common.type.Varchars.isVarcharType) Chars.isCharType(com.facebook.presto.common.type.Chars.isCharType) ArrayType(com.facebook.presto.common.type.ArrayType) HiveTestUtils.rowType(com.facebook.presto.hive.HiveTestUtils.rowType) HiveType.toHiveType(com.facebook.presto.hive.HiveType.toHiveType) PrestoTableType(com.facebook.presto.hive.metastore.PrestoTableType) VarcharType.createVarcharType(com.facebook.presto.common.type.VarcharType.createVarcharType) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) Slice(io.airlift.slice.Slice) Block(com.facebook.presto.common.block.Block) ConnectorPageSink(com.facebook.presto.spi.ConnectorPageSink) MaterializedResult(com.facebook.presto.testing.MaterializedResult) ConnectorSplit(com.facebook.presto.spi.ConnectorSplit)

Example 85 with ColumnHandle

use of com.facebook.presto.spi.ColumnHandle in project presto by prestodb.

the class AbstractTestHiveClient method indexColumns.

protected static ImmutableMap<String, Integer> indexColumns(List<ColumnHandle> columnHandles) {
    ImmutableMap.Builder<String, Integer> index = ImmutableMap.builder();
    int i = 0;
    for (ColumnHandle columnHandle : columnHandles) {
        HiveColumnHandle hiveColumnHandle = (HiveColumnHandle) columnHandle;
        index.put(hiveColumnHandle.getName(), i);
        i++;
    }
    return index.build();
}
Also used : HiveColumnHandle.bucketColumnHandle(com.facebook.presto.hive.HiveColumnHandle.bucketColumnHandle) ColumnHandle(com.facebook.presto.spi.ColumnHandle) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) ImmutableMap(com.google.common.collect.ImmutableMap) Constraint(com.facebook.presto.spi.Constraint)

Aggregations

ColumnHandle (com.facebook.presto.spi.ColumnHandle)243 ImmutableList (com.google.common.collect.ImmutableList)90 ImmutableMap (com.google.common.collect.ImmutableMap)81 Test (org.testng.annotations.Test)81 ConnectorSession (com.facebook.presto.spi.ConnectorSession)80 ConnectorTableHandle (com.facebook.presto.spi.ConnectorTableHandle)70 Constraint (com.facebook.presto.spi.Constraint)66 Map (java.util.Map)65 SchemaTableName (com.facebook.presto.spi.SchemaTableName)60 Optional (java.util.Optional)57 TupleDomain (com.facebook.presto.common.predicate.TupleDomain)54 List (java.util.List)54 Objects.requireNonNull (java.util.Objects.requireNonNull)53 ColumnMetadata (com.facebook.presto.spi.ColumnMetadata)47 ConnectorMetadata (com.facebook.presto.spi.connector.ConnectorMetadata)47 ConnectorTableLayoutHandle (com.facebook.presto.spi.ConnectorTableLayoutHandle)46 ImmutableSet (com.google.common.collect.ImmutableSet)46 TestingConnectorSession (com.facebook.presto.testing.TestingConnectorSession)43 ConnectorTableMetadata (com.facebook.presto.spi.ConnectorTableMetadata)42 PrestoException (com.facebook.presto.spi.PrestoException)42