use of com.facebook.presto.hive.metastore.SortingColumn in project presto by prestodb.
the class AbstractTestHiveClient method doTestBucketSortedTables.
private void doTestBucketSortedTables(SchemaTableName table, boolean useTempPath, HiveStorageFormat storageFormat) throws IOException {
int bucketCount = 3;
int expectedRowCount = 0;
try (Transaction transaction = newTransaction()) {
ConnectorMetadata metadata = transaction.getMetadata();
ConnectorSession session = newSession(ImmutableMap.of(SORTED_WRITE_TO_TEMP_PATH_ENABLED, useTempPath));
// begin creating the table
ConnectorTableMetadata tableMetadata = new ConnectorTableMetadata(table, ImmutableList.<ColumnMetadata>builder().add(new ColumnMetadata("id", VARCHAR)).add(new ColumnMetadata("value_asc", VARCHAR)).add(new ColumnMetadata("value_desc", BIGINT)).add(new ColumnMetadata("ds", VARCHAR)).build(), ImmutableMap.<String, Object>builder().put(STORAGE_FORMAT_PROPERTY, storageFormat).put(PARTITIONED_BY_PROPERTY, ImmutableList.of("ds")).put(BUCKETED_BY_PROPERTY, ImmutableList.of("id")).put(BUCKET_COUNT_PROPERTY, bucketCount).put(SORTED_BY_PROPERTY, ImmutableList.builder().add(new SortingColumn("value_asc", SortingColumn.Order.ASCENDING)).add(new SortingColumn("value_desc", SortingColumn.Order.DESCENDING)).build()).build());
HiveOutputTableHandle outputHandle = (HiveOutputTableHandle) metadata.beginCreateTable(session, tableMetadata, Optional.empty());
// write the data
ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, outputHandle, TEST_HIVE_PAGE_SINK_CONTEXT);
List<Type> types = tableMetadata.getColumns().stream().map(ColumnMetadata::getType).collect(toList());
ThreadLocalRandom random = ThreadLocalRandom.current();
for (int i = 0; i < 50; i++) {
MaterializedResult.Builder builder = MaterializedResult.resultBuilder(session, types);
for (int j = 0; j < 1000; j++) {
builder.row(sha256().hashLong(random.nextLong()).toString(), "test" + random.nextInt(100), random.nextLong(100_000), "2018-04-01");
expectedRowCount++;
}
sink.appendPage(builder.build().toPage());
}
// verify we have enough temporary files per bucket to require multiple passes
Path path = useTempPath ? getTempFilePathRoot(outputHandle).get() : getStagingPathRoot(outputHandle);
HdfsContext context = new HdfsContext(session, table.getSchemaName(), table.getTableName(), outputHandle.getLocationHandle().getTargetPath().toString(), true);
Set<String> files = listAllDataFiles(context, path);
assertThat(listAllDataFiles(context, path)).filteredOn(file -> file.contains(".tmp-sort")).size().isGreaterThan(bucketCount * getHiveClientConfig().getMaxOpenSortFiles() * 2);
// finish the write
Collection<Slice> fragments = getFutureValue(sink.finish());
// verify there are no temporary files
for (String file : listAllDataFiles(context, path)) {
assertThat(file).doesNotContain(".tmp-sort.");
}
// finish creating table
metadata.finishCreateTable(session, outputHandle, fragments, ImmutableList.of());
transaction.commit();
}
// verify that bucket files are sorted
try (Transaction transaction = newTransaction()) {
ConnectorMetadata metadata = transaction.getMetadata();
ConnectorSession session = newSession(ImmutableMap.of(SORTED_WRITE_TO_TEMP_PATH_ENABLED, useTempPath));
ConnectorTableHandle hiveTableHandle = getTableHandle(metadata, table);
List<ColumnHandle> columnHandles = ImmutableList.copyOf(metadata.getColumnHandles(session, hiveTableHandle).values());
ConnectorTableLayoutHandle layoutHandle = getLayout(session, transaction, hiveTableHandle, TupleDomain.all());
List<ConnectorSplit> splits = getAllSplits(session, transaction, layoutHandle);
assertThat(splits).hasSize(bucketCount);
int actualRowCount = 0;
for (ConnectorSplit split : splits) {
try (ConnectorPageSource pageSource = pageSourceProvider.createPageSource(transaction.getTransactionHandle(), session, split, layoutHandle, columnHandles, NON_CACHEABLE)) {
String lastValueAsc = null;
long lastValueDesc = -1;
while (!pageSource.isFinished()) {
Page page = pageSource.getNextPage();
if (page == null) {
continue;
}
for (int i = 0; i < page.getPositionCount(); i++) {
Block blockAsc = page.getBlock(1);
Block blockDesc = page.getBlock(2);
assertFalse(blockAsc.isNull(i));
assertFalse(blockDesc.isNull(i));
String valueAsc = VARCHAR.getSlice(blockAsc, i).toStringUtf8();
if (lastValueAsc != null) {
assertGreaterThanOrEqual(valueAsc, lastValueAsc);
if (valueAsc.equals(lastValueAsc)) {
long valueDesc = BIGINT.getLong(blockDesc, i);
if (lastValueDesc != -1) {
assertLessThanOrEqual(valueDesc, lastValueDesc);
}
lastValueDesc = valueDesc;
} else {
lastValueDesc = -1;
}
}
lastValueAsc = valueAsc;
actualRowCount++;
}
}
}
}
assertThat(actualRowCount).isEqualTo(expectedRowCount);
}
}
use of com.facebook.presto.hive.metastore.SortingColumn in project presto by prestodb.
the class HiveMetadata method prepareTable.
private Table prepareTable(ConnectorSession session, ConnectorTableMetadata tableMetadata, PrestoTableType tableType) {
SchemaTableName schemaTableName = tableMetadata.getTable();
String schemaName = schemaTableName.getSchemaName();
String tableName = schemaTableName.getTableName();
List<String> partitionedBy = getPartitionedBy(tableMetadata.getProperties());
Optional<HiveBucketProperty> bucketProperty = getBucketProperty(tableMetadata.getProperties());
if ((bucketProperty.isPresent() || !partitionedBy.isEmpty()) && getAvroSchemaUrl(tableMetadata.getProperties()) != null) {
throw new PrestoException(NOT_SUPPORTED, "Bucketing/Partitioning columns not supported when Avro schema url is set");
}
List<HiveColumnHandle> columnHandles = getColumnHandles(tableMetadata, ImmutableSet.copyOf(partitionedBy), typeTranslator);
HiveStorageFormat hiveStorageFormat = getHiveStorageFormat(tableMetadata.getProperties());
List<SortingColumn> preferredOrderingColumns = getPreferredOrderingColumns(tableMetadata.getProperties());
Optional<TableEncryptionProperties> tableEncryptionProperties = getTableEncryptionPropertiesFromTableProperties(tableMetadata, hiveStorageFormat, partitionedBy);
if (tableEncryptionProperties.isPresent() && partitionedBy.isEmpty()) {
throw new PrestoException(HIVE_UNSUPPORTED_ENCRYPTION_OPERATION, "Creating an encrypted table without partitions is not supported. Use CREATE TABLE AS SELECT to " + "create an encrypted table without partitions");
}
validateColumns(hiveStorageFormat, columnHandles);
MetastoreContext metastoreContext = getMetastoreContext(session);
Map<String, HiveColumnHandle> columnHandlesByName = Maps.uniqueIndex(columnHandles, HiveColumnHandle::getName);
List<Column> partitionColumns = partitionedBy.stream().map(columnHandlesByName::get).map(columnHandle -> columnHandleToColumn(metastoreContext, columnHandle)).collect(toList());
checkPartitionTypesSupported(partitionColumns);
Path targetPath;
if (tableType.equals(EXTERNAL_TABLE)) {
if (!createsOfNonManagedTablesEnabled) {
throw new PrestoException(NOT_SUPPORTED, "Cannot create non-managed Hive table");
}
String externalLocation = getExternalLocation(tableMetadata.getProperties());
targetPath = getExternalPath(new HdfsContext(session, schemaName, tableName, externalLocation, true), externalLocation);
} else if (tableType.equals(MANAGED_TABLE) || tableType.equals(MATERIALIZED_VIEW)) {
LocationHandle locationHandle = locationService.forNewTable(metastore, session, schemaName, tableName, isTempPathRequired(session, bucketProperty, preferredOrderingColumns));
targetPath = locationService.getQueryWriteInfo(locationHandle).getTargetPath();
} else {
throw new IllegalStateException(format("%s is not a valid table type to be created.", tableType));
}
Map<String, String> tableProperties = getEmptyTableProperties(tableMetadata, new HdfsContext(session, schemaName, tableName, targetPath.toString(), true), hiveStorageFormat, tableEncryptionProperties);
return buildTableObject(session.getQueryId(), schemaName, tableName, session.getUser(), columnHandles, hiveStorageFormat, partitionedBy, bucketProperty, preferredOrderingColumns, tableProperties, targetPath, tableType, prestoVersion, metastoreContext);
}
use of com.facebook.presto.hive.metastore.SortingColumn in project presto by prestodb.
the class HivePageSinkProvider method createPageSink.
private ConnectorPageSink createPageSink(HiveWritableTableHandle handle, boolean isCreateTable, ConnectorSession session, HiveMetadataUpdater hiveMetadataUpdater, boolean commitRequired, Map<String, String> additionalTableParameters) {
OptionalInt bucketCount = OptionalInt.empty();
List<SortingColumn> sortedBy;
if (handle.getBucketProperty().isPresent()) {
bucketCount = OptionalInt.of(handle.getBucketProperty().get().getBucketCount());
sortedBy = handle.getBucketProperty().get().getSortedBy();
} else {
sortedBy = handle.getPreferredOrderingColumns();
}
HiveWriterFactory writerFactory = new HiveWriterFactory(fileWriterFactories, handle.getSchemaName(), handle.getTableName(), isCreateTable, handle.getInputColumns(), handle.getTableStorageFormat(), handle.getPartitionStorageFormat(), handle.getCompressionCodec(), additionalTableParameters, bucketCount, sortedBy, handle.getLocationHandle(), locationService, session.getQueryId(), // TODO: Extend metastore cache scope to the entire transaction
new HivePageSinkMetadataProvider(handle.getPageSinkMetadata(), memoizeMetastore(metastore, metastoreImpersonationEnabled, perTransactionMetastoreCacheMaximumSize), new MetastoreContext(session.getIdentity(), session.getQueryId(), session.getClientInfo(), session.getSource(), getMetastoreHeaders(session), isUserDefinedTypeEncodingEnabled(session), columnConverterProvider)), typeManager, hdfsEnvironment, pageSorter, writerSortBufferSize, maxOpenSortFiles, immutablePartitions, session, nodeManager, eventClient, hiveSessionProperties, hiveWriterStats, orcFileWriterFactory, commitRequired, handle.getEncryptionInformation());
return new HivePageSink(writerFactory, handle.getInputColumns(), handle.getBucketProperty(), handle.getSchemaName(), handle.getTableName(), pageIndexerFactory, typeManager, hdfsEnvironment, maxOpenPartitions, writeVerificationExecutor, partitionUpdateCodec, partitionUpdateSmileCodec, session, hiveMetadataUpdater);
}
use of com.facebook.presto.hive.metastore.SortingColumn in project presto by prestodb.
the class HiveMetadata method buildTableObject.
private static Table buildTableObject(String queryId, String schemaName, String tableName, String tableOwner, List<HiveColumnHandle> columnHandles, HiveStorageFormat hiveStorageFormat, List<String> partitionedBy, Optional<HiveBucketProperty> bucketProperty, List<SortingColumn> preferredOrderingColumns, Map<String, String> additionalTableParameters, Path targetPath, PrestoTableType tableType, String prestoVersion, MetastoreContext metastoreContext) {
Map<String, HiveColumnHandle> columnHandlesByName = Maps.uniqueIndex(columnHandles, HiveColumnHandle::getName);
List<Column> partitionColumns = partitionedBy.stream().map(columnHandlesByName::get).map(column -> columnHandleToColumn(metastoreContext, column)).collect(toList());
Set<String> partitionColumnNames = ImmutableSet.copyOf(partitionedBy);
ImmutableList.Builder<Column> columns = ImmutableList.builder();
for (HiveColumnHandle columnHandle : columnHandles) {
String name = columnHandle.getName();
HiveType type = columnHandle.getHiveType();
if (!partitionColumnNames.contains(name)) {
verify(!columnHandle.isPartitionKey(), "Column handles are not consistent with partitioned by property");
columns.add(columnHandleToColumn(metastoreContext, columnHandle));
} else {
verify(columnHandle.isPartitionKey(), "Column handles are not consistent with partitioned by property");
}
}
ImmutableMap.Builder<String, String> tableParameters = ImmutableMap.<String, String>builder().put(PRESTO_VERSION_NAME, prestoVersion).put(PRESTO_QUERY_ID_NAME, queryId).putAll(additionalTableParameters);
if (tableType.equals(EXTERNAL_TABLE)) {
tableParameters.put("EXTERNAL", "TRUE");
}
Table.Builder tableBuilder = Table.builder().setDatabaseName(schemaName).setTableName(tableName).setOwner(tableOwner).setTableType(tableType).setDataColumns(columns.build()).setPartitionColumns(partitionColumns).setParameters(tableParameters.build());
tableBuilder.getStorageBuilder().setStorageFormat(fromHiveStorageFormat(hiveStorageFormat)).setBucketProperty(bucketProperty).setParameters(ImmutableMap.of(PREFERRED_ORDERING_COLUMNS, encodePreferredOrderingColumns(preferredOrderingColumns))).setLocation(targetPath.toString());
return tableBuilder.build();
}
use of com.facebook.presto.hive.metastore.SortingColumn in project presto by prestodb.
the class HiveMetadata method getTableMetadata.
private ConnectorTableMetadata getTableMetadata(ConnectorSession session, SchemaTableName tableName) {
MetastoreContext metastoreContext = getMetastoreContext(session);
Optional<Table> table = metastore.getTable(metastoreContext, tableName.getSchemaName(), tableName.getTableName());
if (!table.isPresent() || table.get().getTableType().equals(VIRTUAL_VIEW)) {
throw new TableNotFoundException(tableName);
}
Function<HiveColumnHandle, ColumnMetadata> metadataGetter = columnMetadataGetter(table.get(), typeManager, metastoreContext.getColumnConverter());
ImmutableList.Builder<ColumnMetadata> columns = ImmutableList.builder();
for (HiveColumnHandle columnHandle : hiveColumnHandles(table.get())) {
columns.add(metadataGetter.apply(columnHandle));
}
// External location property
ImmutableMap.Builder<String, Object> properties = ImmutableMap.builder();
if (table.get().getTableType().equals(EXTERNAL_TABLE)) {
properties.put(EXTERNAL_LOCATION_PROPERTY, table.get().getStorage().getLocation());
}
// Storage format property
HiveStorageFormat format = null;
try {
format = extractHiveStorageFormat(table.get());
properties.put(STORAGE_FORMAT_PROPERTY, format);
} catch (PrestoException ignored) {
// todo fail if format is not known
}
getTableEncryptionPropertiesFromHiveProperties(table.get().getParameters(), format).map(TableEncryptionProperties::toTableProperties).ifPresent(properties::putAll);
// Partitioning property
List<String> partitionedBy = table.get().getPartitionColumns().stream().map(Column::getName).collect(toList());
if (!partitionedBy.isEmpty()) {
properties.put(PARTITIONED_BY_PROPERTY, partitionedBy);
}
// Bucket properties
Optional<HiveBucketProperty> bucketProperty = table.get().getStorage().getBucketProperty();
table.get().getStorage().getBucketProperty().ifPresent(property -> {
properties.put(BUCKET_COUNT_PROPERTY, property.getBucketCount());
properties.put(BUCKETED_BY_PROPERTY, property.getBucketedBy());
properties.put(SORTED_BY_PROPERTY, property.getSortedBy());
});
// Preferred ordering columns
List<SortingColumn> preferredOrderingColumns = decodePreferredOrderingColumnsFromStorage(table.get().getStorage());
if (!preferredOrderingColumns.isEmpty()) {
if (bucketProperty.isPresent()) {
throw new PrestoException(HIVE_INVALID_METADATA, format("bucketed table %s should not specify preferred_ordering_columns", tableName));
}
properties.put(PREFERRED_ORDERING_COLUMNS, preferredOrderingColumns);
}
// ORC format specific properties
String orcBloomFilterColumns = table.get().getParameters().get(ORC_BLOOM_FILTER_COLUMNS_KEY);
if (orcBloomFilterColumns != null) {
properties.put(ORC_BLOOM_FILTER_COLUMNS, Splitter.on(COMMA).trimResults().omitEmptyStrings().splitToList(orcBloomFilterColumns));
}
String orcBloomFilterFfp = table.get().getParameters().get(ORC_BLOOM_FILTER_FPP_KEY);
if (orcBloomFilterFfp != null) {
properties.put(ORC_BLOOM_FILTER_FPP, Double.parseDouble(orcBloomFilterFfp));
}
// Avro specfic property
String avroSchemaUrl = table.get().getParameters().get(AVRO_SCHEMA_URL_KEY);
if (avroSchemaUrl != null) {
properties.put(AVRO_SCHEMA_URL, avroSchemaUrl);
}
// CSV specific property
getCsvSerdeProperty(table.get(), CSV_SEPARATOR_KEY).ifPresent(csvSeparator -> properties.put(CSV_SEPARATOR, csvSeparator));
getCsvSerdeProperty(table.get(), CSV_QUOTE_KEY).ifPresent(csvQuote -> properties.put(CSV_QUOTE, csvQuote));
getCsvSerdeProperty(table.get(), CSV_ESCAPE_KEY).ifPresent(csvEscape -> properties.put(CSV_ESCAPE, csvEscape));
// Hook point for extended versions of the Hive Plugin
properties.putAll(tableParameterCodec.decode(table.get().getParameters()));
Optional<String> comment = Optional.ofNullable(table.get().getParameters().get(TABLE_COMMENT));
return new ConnectorTableMetadata(tableName, columns.build(), properties.build(), comment);
}
Aggregations