use of com.facebook.presto.hive.metastore.Storage in project presto by prestodb.
the class TestHivePageSourceProvider method testNotUseRecordReaderWithInputFormatAnnotationWithoutCustomSplit.
@Test
public void testNotUseRecordReaderWithInputFormatAnnotationWithoutCustomSplit() {
StorageFormat storageFormat = StorageFormat.create(ParquetHiveSerDe.class.getName(), HoodieParquetInputFormat.class.getName(), "");
Storage storage = new Storage(storageFormat, "test", Optional.empty(), true, ImmutableMap.of(), ImmutableMap.of());
HiveRecordCursorProvider recordCursorProvider = new MockHiveRecordCursorProvider();
HiveBatchPageSourceFactory hiveBatchPageSourceFactory = new MockHiveBatchPageSourceFactory();
Optional<ConnectorPageSource> pageSource = HivePageSourceProvider.createHivePageSource(ImmutableSet.of(recordCursorProvider), ImmutableSet.of(hiveBatchPageSourceFactory), new Configuration(), new TestingConnectorSession(new HiveSessionProperties(new HiveClientConfig().setUseRecordPageSourceForCustomSplit(true), new OrcFileWriterConfig(), new ParquetFileWriterConfig(), new CacheConfig()).getSessionProperties()), new Path("/test/"), OptionalInt.empty(), 0, 100, 200, Instant.now().toEpochMilli(), storage, TupleDomain.none(), ImmutableList.of(), ImmutableMap.of(), ImmutableList.of(), DateTimeZone.UTC, new TestingTypeManager(), new SchemaTableName("test", "test"), ImmutableList.of(), ImmutableList.of(), ImmutableMap.of(), 0, TableToPartitionMapping.empty(), Optional.empty(), false, null, null, false, null, Optional.empty(), ImmutableMap.of());
assertTrue(pageSource.isPresent());
assertTrue(pageSource.get() instanceof HivePageSource);
}
use of com.facebook.presto.hive.metastore.Storage in project presto by prestodb.
the class TestHivePageSourceProvider method testGenerateCacheQuota.
@Test
public void testGenerateCacheQuota() {
HiveClientConfig config = new HiveClientConfig();
HiveSplit split = new HiveSplit(SCHEMA_NAME, TABLE_NAME, PARTITION_NAME, "file://test", 0, 10, 10, Instant.now().toEpochMilli(), new Storage(StorageFormat.create(config.getHiveStorageFormat().getSerDe(), config.getHiveStorageFormat().getInputFormat(), config.getHiveStorageFormat().getOutputFormat()), "location", Optional.empty(), false, ImmutableMap.of(), ImmutableMap.of()), ImmutableList.of(), ImmutableList.of(), OptionalInt.empty(), OptionalInt.empty(), NO_PREFERENCE, getColumnHandles().size(), TableToPartitionMapping.empty(), Optional.empty(), false, Optional.empty(), NO_CACHE_REQUIREMENT, Optional.empty(), ImmutableMap.of(), ImmutableSet.of(), SplitWeight.standard());
CacheQuota cacheQuota = HivePageSourceProvider.generateCacheQuota(split);
CacheQuota expectedCacheQuota = new CacheQuota(".", Optional.empty());
assertEquals(cacheQuota, expectedCacheQuota);
split = new HiveSplit(SCHEMA_NAME, TABLE_NAME, PARTITION_NAME, "file://test", 0, 10, 10, Instant.now().toEpochMilli(), new Storage(StorageFormat.create(config.getHiveStorageFormat().getSerDe(), config.getHiveStorageFormat().getInputFormat(), config.getHiveStorageFormat().getOutputFormat()), "location", Optional.empty(), false, ImmutableMap.of(), ImmutableMap.of()), ImmutableList.of(), ImmutableList.of(), OptionalInt.empty(), OptionalInt.empty(), NO_PREFERENCE, getColumnHandles().size(), TableToPartitionMapping.empty(), Optional.empty(), false, Optional.empty(), new CacheQuotaRequirement(PARTITION, Optional.of(DataSize.succinctDataSize(1, DataSize.Unit.MEGABYTE))), Optional.empty(), ImmutableMap.of(), ImmutableSet.of(), SplitWeight.standard());
cacheQuota = HivePageSourceProvider.generateCacheQuota(split);
expectedCacheQuota = new CacheQuota(SCHEMA_NAME + "." + TABLE_NAME + "." + PARTITION_NAME, Optional.of(DataSize.succinctDataSize(1, DataSize.Unit.MEGABYTE)));
assertEquals(cacheQuota, expectedCacheQuota);
}
use of com.facebook.presto.hive.metastore.Storage in project presto by prestodb.
the class HiveMetadata method createTemporaryTable.
@Override
public ConnectorTableHandle createTemporaryTable(ConnectorSession session, List<ColumnMetadata> columns, Optional<ConnectorPartitioningMetadata> partitioningMetadata) {
String schemaName = getTemporaryTableSchema(session);
HiveStorageFormat storageFormat = getTemporaryTableStorageFormat(session);
Optional<HiveBucketProperty> bucketProperty = partitioningMetadata.map(partitioning -> {
Set<String> allColumns = columns.stream().map(ColumnMetadata::getName).collect(toImmutableSet());
if (!allColumns.containsAll(partitioning.getPartitionColumns())) {
throw new PrestoException(INVALID_TABLE_PROPERTY, format("Bucketing columns %s not present in schema", Sets.difference(ImmutableSet.copyOf(partitioning.getPartitionColumns()), allColumns)));
}
HivePartitioningHandle partitioningHandle = (HivePartitioningHandle) partitioning.getPartitioningHandle();
List<String> partitionColumns = partitioning.getPartitionColumns();
BucketFunctionType bucketFunctionType = partitioningHandle.getBucketFunctionType();
switch(bucketFunctionType) {
case HIVE_COMPATIBLE:
return new HiveBucketProperty(partitionColumns, partitioningHandle.getBucketCount(), ImmutableList.of(), HIVE_COMPATIBLE, Optional.empty());
case PRESTO_NATIVE:
Map<String, Type> columnNameToTypeMap = columns.stream().collect(toMap(ColumnMetadata::getName, ColumnMetadata::getType));
return new HiveBucketProperty(partitionColumns, partitioningHandle.getBucketCount(), ImmutableList.of(), PRESTO_NATIVE, Optional.of(partitionColumns.stream().map(columnNameToTypeMap::get).collect(toImmutableList())));
default:
throw new IllegalArgumentException("Unsupported bucket function type " + bucketFunctionType);
}
});
if (isUsePageFileForHiveUnsupportedType(session)) {
if (!columns.stream().map(ColumnMetadata::getType).allMatch(HiveTypeTranslator::isSupportedHiveType)) {
storageFormat = PAGEFILE;
}
}
// PAGEFILE format doesn't require translation to hive type,
// choose HIVE_BINARY as a default hive type to make it compatible with Hive connector
Optional<HiveType> defaultHiveType = storageFormat == PAGEFILE ? Optional.of(HIVE_BINARY) : Optional.empty();
List<HiveColumnHandle> columnHandles = getColumnHandles(// type to the boolean type that is binary compatible
translateHiveUnsupportedTypesForTemporaryTable(columns, typeManager), ImmutableSet.of(), typeTranslator, defaultHiveType);
validateColumns(storageFormat, columnHandles);
HiveStorageFormat finalStorageFormat = storageFormat;
String tableName = PRESTO_TEMPORARY_TABLE_NAME_PREFIX + finalStorageFormat.name() + "_" + session.getQueryId().replaceAll("-", "_") + "_" + randomUUID().toString().replaceAll("-", "_");
Table table = Table.builder().setDatabaseName(schemaName).setTableName(tableName).setOwner(session.getUser()).setTableType(TEMPORARY_TABLE).setDataColumns(columnHandles.stream().map(handle -> new Column(handle.getName(), handle.getHiveType(), handle.getComment(), Optional.empty())).collect(toImmutableList())).withStorage(storage -> storage.setStorageFormat(fromHiveStorageFormat(finalStorageFormat)).setBucketProperty(bucketProperty).setLocation("")).build();
List<String> partitionColumnNames = table.getPartitionColumns().stream().map(Column::getName).collect(toImmutableList());
List<HiveColumnHandle> hiveColumnHandles = hiveColumnHandles(table);
Map<String, Type> columnTypes = hiveColumnHandles.stream().filter(columnHandle -> !columnHandle.isHidden()).collect(toImmutableMap(HiveColumnHandle::getName, column -> column.getHiveType().getType(typeManager)));
Map<String, Set<ColumnStatisticType>> columnStatisticTypes = hiveColumnHandles.stream().filter(columnHandle -> !partitionColumnNames.contains(columnHandle.getName())).filter(column -> !column.isHidden()).collect(toImmutableMap(HiveColumnHandle::getName, column -> ImmutableSet.copyOf(getSupportedColumnStatisticsForTemporaryTable(typeManager.getType(column.getTypeSignature())))));
metastore.createTable(session, table, buildInitialPrivilegeSet(table.getOwner()), Optional.empty(), false, createEmptyPartitionStatistics(columnTypes, columnStatisticTypes));
return new HiveTableHandle(schemaName, tableName);
}
use of com.facebook.presto.hive.metastore.Storage in project presto by prestodb.
the class RcFilePageSourceFactory method createPageSource.
@Override
public Optional<? extends ConnectorPageSource> createPageSource(Configuration configuration, ConnectorSession session, Path path, long start, long length, long fileSize, Storage storage, SchemaTableName tableName, Map<String, String> tableParameters, List<HiveColumnHandle> columns, TupleDomain<HiveColumnHandle> effectivePredicate, DateTimeZone hiveStorageTimeZone, HiveFileContext hiveFileContext, Optional<EncryptionInformation> encryptionInformation) {
if (!columns.isEmpty() && columns.stream().allMatch(hiveColumnHandle -> hiveColumnHandle.getColumnType() == AGGREGATED)) {
throw new UnsupportedOperationException("Partial aggregation pushdown only supported for ORC/Parquet files. " + "Table " + tableName.toString() + " has file (" + path.toString() + ") of format " + storage.getStorageFormat().getOutputFormat() + ". Set session property hive.pushdown_partial_aggregations_into_scan=false and execute query again");
}
RcFileEncoding rcFileEncoding;
if (LazyBinaryColumnarSerDe.class.getName().equals(storage.getStorageFormat().getSerDe())) {
rcFileEncoding = new BinaryRcFileEncoding();
} else if (ColumnarSerDe.class.getName().equals(storage.getStorageFormat().getSerDe())) {
rcFileEncoding = createTextVectorEncoding(getHiveSchema(storage.getSerdeParameters(), tableParameters), hiveStorageTimeZone);
} else {
return Optional.empty();
}
if (fileSize == 0) {
throw new PrestoException(HIVE_BAD_DATA, "RCFile is empty: " + path);
}
FSDataInputStream inputStream;
try {
inputStream = hdfsEnvironment.getFileSystem(session.getUser(), path, configuration).openFile(path, hiveFileContext);
} catch (Exception e) {
if (nullToEmpty(e.getMessage()).trim().equals("Filesystem closed") || e instanceof FileNotFoundException) {
throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, e);
}
throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, splitError(e, path, start, length), e);
}
try {
ImmutableMap.Builder<Integer, Type> readColumns = ImmutableMap.builder();
for (HiveColumnHandle column : columns) {
readColumns.put(column.getHiveColumnIndex(), column.getHiveType().getType(typeManager));
}
RcFileReader rcFileReader = new RcFileReader(new HdfsRcFileDataSource(path.toString(), inputStream, fileSize, stats), rcFileEncoding, readColumns.build(), new AircompressorCodecFactory(new HadoopCodecFactory(configuration.getClassLoader())), start, length, new DataSize(8, Unit.MEGABYTE));
return Optional.of(new RcFilePageSource(rcFileReader, columns, typeManager));
} catch (Throwable e) {
try {
inputStream.close();
} catch (IOException ignored) {
}
if (e instanceof PrestoException) {
throw (PrestoException) e;
}
String message = splitError(e, path, start, length);
if (e instanceof RcFileCorruptionException) {
throw new PrestoException(HIVE_BAD_DATA, message, e);
}
if (e.getClass().getSimpleName().equals("BlockMissingException")) {
throw new PrestoException(HIVE_MISSING_DATA, message, e);
}
throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, message, e);
}
}
use of com.facebook.presto.hive.metastore.Storage in project presto by prestodb.
the class TestHivePageSink method createPageSource.
private static ConnectorPageSource createPageSource(HiveTransactionHandle transaction, HiveClientConfig config, MetastoreClientConfig metastoreClientConfig, File outputFile) {
HiveSplit split = new HiveSplit(SCHEMA_NAME, TABLE_NAME, "", "file:///" + outputFile.getAbsolutePath(), 0, outputFile.length(), outputFile.length(), outputFile.lastModified(), new Storage(StorageFormat.create(config.getHiveStorageFormat().getSerDe(), config.getHiveStorageFormat().getInputFormat(), config.getHiveStorageFormat().getOutputFormat()), "location", Optional.empty(), false, ImmutableMap.of(), ImmutableMap.of()), ImmutableList.of(), ImmutableList.of(), OptionalInt.empty(), OptionalInt.empty(), NO_PREFERENCE, getColumnHandles().size(), TableToPartitionMapping.empty(), Optional.empty(), false, Optional.empty(), NO_CACHE_REQUIREMENT, Optional.empty(), ImmutableMap.of(), ImmutableSet.of(), SplitWeight.standard());
TableHandle tableHandle = new TableHandle(new ConnectorId(HIVE_CATALOG), new HiveTableHandle(SCHEMA_NAME, TABLE_NAME), transaction, Optional.of(new HiveTableLayoutHandle(new SchemaTableName(SCHEMA_NAME, TABLE_NAME), "path", ImmutableList.of(), getColumnHandles().stream().map(column -> new Column(column.getName(), column.getHiveType(), Optional.empty(), Optional.empty())).collect(toImmutableList()), ImmutableMap.of(), TupleDomain.all(), TRUE_CONSTANT, ImmutableMap.of(), TupleDomain.all(), Optional.empty(), Optional.empty(), false, "layout", Optional.empty(), false)));
HivePageSourceProvider provider = new HivePageSourceProvider(config, createTestHdfsEnvironment(config, metastoreClientConfig), getDefaultHiveRecordCursorProvider(config, metastoreClientConfig), getDefaultHiveBatchPageSourceFactories(config, metastoreClientConfig), getDefaultHiveSelectivePageSourceFactories(config, metastoreClientConfig), FUNCTION_AND_TYPE_MANAGER, ROW_EXPRESSION_SERVICE);
return provider.createPageSource(transaction, getSession(config), split, tableHandle.getLayout().get(), ImmutableList.copyOf(getColumnHandles()), NON_CACHEABLE);
}
Aggregations