use of com.facebook.presto.spi.ConnectorSession in project presto by prestodb.
the class HivePageSourceProvider method createSelectivePageSource.
private static Optional<ConnectorPageSource> createSelectivePageSource(Set<HiveSelectivePageSourceFactory> selectivePageSourceFactories, Configuration configuration, ConnectorSession session, HiveSplit split, HiveTableLayoutHandle layout, List<HiveColumnHandle> columns, DateTimeZone hiveStorageTimeZone, TypeManager typeManager, LoadingCache<RowExpressionCacheKey, RowExpression> rowExpressionCache, SplitContext splitContext, Optional<EncryptionInformation> encryptionInformation) {
Set<HiveColumnHandle> interimColumns = ImmutableSet.<HiveColumnHandle>builder().addAll(layout.getPredicateColumns().values()).addAll(split.getBucketConversion().map(BucketConversion::getBucketColumnHandles).orElse(ImmutableList.of())).build();
Set<String> columnNames = columns.stream().map(HiveColumnHandle::getName).collect(toImmutableSet());
List<HiveColumnHandle> allColumns = ImmutableList.<HiveColumnHandle>builder().addAll(columns).addAll(interimColumns.stream().filter(column -> !columnNames.contains(column.getName())).collect(toImmutableList())).build();
Path path = new Path(split.getPath());
List<ColumnMapping> columnMappings = ColumnMapping.buildColumnMappings(split.getPartitionKeys(), allColumns, ImmutableList.of(), split.getTableToPartitionMapping(), path, split.getTableBucketNumber(), split.getFileSize(), split.getFileModifiedTime());
Optional<BucketAdaptation> bucketAdaptation = split.getBucketConversion().map(conversion -> toBucketAdaptation(conversion, columnMappings, split.getTableBucketNumber(), mapping -> mapping.getHiveColumnHandle().getHiveColumnIndex()));
Map<Integer, String> prefilledValues = columnMappings.stream().filter(mapping -> mapping.getKind() == ColumnMappingKind.PREFILLED).collect(toImmutableMap(mapping -> mapping.getHiveColumnHandle().getHiveColumnIndex(), ColumnMapping::getPrefilledValue));
Map<Integer, HiveCoercer> coercers = columnMappings.stream().filter(mapping -> mapping.getCoercionFrom().isPresent()).collect(toImmutableMap(mapping -> mapping.getHiveColumnHandle().getHiveColumnIndex(), mapping -> createCoercer(typeManager, mapping.getCoercionFrom().get(), mapping.getHiveColumnHandle().getHiveType())));
List<Integer> outputColumns = columns.stream().map(HiveColumnHandle::getHiveColumnIndex).collect(toImmutableList());
RowExpression optimizedRemainingPredicate = rowExpressionCache.getUnchecked(new RowExpressionCacheKey(layout.getRemainingPredicate(), session));
if (shouldSkipBucket(layout, split, splitContext)) {
return Optional.of(new HiveEmptySplitPageSource());
}
if (shouldSkipPartition(typeManager, layout, hiveStorageTimeZone, split, splitContext)) {
return Optional.of(new HiveEmptySplitPageSource());
}
CacheQuota cacheQuota = generateCacheQuota(split);
for (HiveSelectivePageSourceFactory pageSourceFactory : selectivePageSourceFactories) {
Optional<? extends ConnectorPageSource> pageSource = pageSourceFactory.createPageSource(configuration, session, path, split.getStart(), split.getLength(), split.getFileSize(), split.getStorage(), toColumnHandles(columnMappings, true), prefilledValues, coercers, bucketAdaptation, outputColumns, splitContext.getDynamicFilterPredicate().map(filter -> filter.transform(handle -> new Subfield(((HiveColumnHandle) handle).getName())).intersect(layout.getDomainPredicate())).orElse(layout.getDomainPredicate()), optimizedRemainingPredicate, hiveStorageTimeZone, new HiveFileContext(splitContext.isCacheable(), cacheQuota, split.getExtraFileInfo().map(BinaryExtraHiveFileInfo::new), Optional.of(split.getFileSize()), split.getFileModifiedTime(), HiveSessionProperties.isVerboseRuntimeStatsEnabled(session)), encryptionInformation);
if (pageSource.isPresent()) {
return Optional.of(pageSource.get());
}
}
return Optional.empty();
}
use of com.facebook.presto.spi.ConnectorSession in project presto by prestodb.
the class HivePartitionManager method getPartitionsIterator.
public Iterable<HivePartition> getPartitionsIterator(SemiTransactionalHiveMetastore metastore, ConnectorTableHandle tableHandle, Constraint<ColumnHandle> constraint, ConnectorSession session) {
HiveTableHandle hiveTableHandle = (HiveTableHandle) tableHandle;
TupleDomain<ColumnHandle> effectivePredicateColumnHandles = constraint.getSummary();
SchemaTableName tableName = hiveTableHandle.getSchemaTableName();
Table table = getTable(session, metastore, tableName, isOfflineDataDebugModeEnabled(session));
List<HiveColumnHandle> partitionColumns = getPartitionKeyColumnHandles(table);
List<Type> partitionTypes = partitionColumns.stream().map(column -> typeManager.getType(column.getTypeSignature())).collect(toList());
Map<Column, Domain> effectivePredicate = createPartitionPredicates(metastore, session, effectivePredicateColumnHandles, partitionColumns, assumeCanonicalPartitionKeys);
if (partitionColumns.isEmpty()) {
return ImmutableList.of(new HivePartition(tableName));
} else {
return () -> {
List<String> filteredPartitionNames = getFilteredPartitionNames(session, metastore, tableName, effectivePredicate);
return filteredPartitionNames.stream().map(partitionName -> parseValuesAndFilterPartition(tableName, partitionName, partitionColumns, partitionTypes, constraint)).filter(Optional::isPresent).map(Optional::get).iterator();
};
}
}
use of com.facebook.presto.spi.ConnectorSession in project presto by prestodb.
the class HivePartitionManager method getPartitions.
public HivePartitionResult getPartitions(SemiTransactionalHiveMetastore metastore, ConnectorTableHandle tableHandle, List<List<String>> partitionValuesList, ConnectorSession session) {
HiveTableHandle hiveTableHandle = (HiveTableHandle) tableHandle;
SchemaTableName tableName = hiveTableHandle.getSchemaTableName();
Table table = getTable(session, metastore, tableName, isOfflineDataDebugModeEnabled(session));
List<HiveColumnHandle> partitionColumns = getPartitionKeyColumnHandles(table);
List<Type> partitionColumnTypes = partitionColumns.stream().map(column -> typeManager.getType(column.getTypeSignature())).collect(toImmutableList());
List<HivePartition> partitionList = partitionValuesList.stream().map(partitionValues -> makePartName(table.getPartitionColumns(), partitionValues)).map(partitionName -> parseValuesAndFilterPartition(tableName, partitionName, partitionColumns, partitionColumnTypes, alwaysTrue())).map(partition -> partition.orElseThrow(() -> new VerifyException("partition must exist"))).collect(toImmutableList());
Optional<HiveBucketHandle> bucketHandle = shouldIgnoreTableBucketing(session) ? Optional.empty() : getHiveBucketHandle(table);
return new HivePartitionResult(partitionColumns, table.getDataColumns(), table.getParameters(), partitionList, TupleDomain.all(), TupleDomain.all(), TupleDomain.none(), bucketHandle, Optional.empty());
}
use of com.facebook.presto.spi.ConnectorSession in project presto by prestodb.
the class OrcSelectivePageSourceFactory method toFilterFunctions.
/**
* Split filter expression into groups of conjuncts that depend on the same set of inputs,
* then compile each group into FilterFunction.
*/
private static List<FilterFunction> toFilterFunctions(RowExpression filter, Optional<BucketAdapter> bucketAdapter, ConnectorSession session, DeterminismEvaluator determinismEvaluator, PredicateCompiler predicateCompiler) {
ImmutableList.Builder<FilterFunction> filterFunctions = ImmutableList.builder();
bucketAdapter.map(predicate -> new FilterFunction(session.getSqlFunctionProperties(), true, predicate)).ifPresent(filterFunctions::add);
if (TRUE_CONSTANT.equals(filter)) {
return filterFunctions.build();
}
DynamicFilterExtractResult extractDynamicFilterResult = extractDynamicFilters(filter);
// dynamic filter will be added through subfield pushdown
filter = and(extractDynamicFilterResult.getStaticConjuncts());
if (!isAdaptiveFilterReorderingEnabled(session)) {
filterFunctions.add(new FilterFunction(session.getSqlFunctionProperties(), determinismEvaluator.isDeterministic(filter), predicateCompiler.compilePredicate(session.getSqlFunctionProperties(), session.getSessionFunctions(), filter).get()));
return filterFunctions.build();
}
List<RowExpression> conjuncts = extractConjuncts(filter);
if (conjuncts.size() == 1) {
filterFunctions.add(new FilterFunction(session.getSqlFunctionProperties(), determinismEvaluator.isDeterministic(filter), predicateCompiler.compilePredicate(session.getSqlFunctionProperties(), session.getSessionFunctions(), filter).get()));
return filterFunctions.build();
}
// Use LinkedHashMap to preserve user-specified order of conjuncts. This will be the initial order in which filters are applied.
Map<Set<Integer>, List<RowExpression>> inputsToConjuncts = new LinkedHashMap<>();
for (RowExpression conjunct : conjuncts) {
inputsToConjuncts.computeIfAbsent(extractInputs(conjunct), k -> new ArrayList<>()).add(conjunct);
}
inputsToConjuncts.values().stream().map(expressions -> binaryExpression(AND, expressions)).map(predicate -> new FilterFunction(session.getSqlFunctionProperties(), determinismEvaluator.isDeterministic(predicate), predicateCompiler.compilePredicate(session.getSqlFunctionProperties(), session.getSessionFunctions(), predicate).get())).forEach(filterFunctions::add);
return filterFunctions.build();
}
use of com.facebook.presto.spi.ConnectorSession in project presto by prestodb.
the class RcFilePageSourceFactory method createPageSource.
@Override
public Optional<? extends ConnectorPageSource> createPageSource(Configuration configuration, ConnectorSession session, Path path, long start, long length, long fileSize, Storage storage, SchemaTableName tableName, Map<String, String> tableParameters, List<HiveColumnHandle> columns, TupleDomain<HiveColumnHandle> effectivePredicate, DateTimeZone hiveStorageTimeZone, HiveFileContext hiveFileContext, Optional<EncryptionInformation> encryptionInformation) {
if (!columns.isEmpty() && columns.stream().allMatch(hiveColumnHandle -> hiveColumnHandle.getColumnType() == AGGREGATED)) {
throw new UnsupportedOperationException("Partial aggregation pushdown only supported for ORC/Parquet files. " + "Table " + tableName.toString() + " has file (" + path.toString() + ") of format " + storage.getStorageFormat().getOutputFormat() + ". Set session property hive.pushdown_partial_aggregations_into_scan=false and execute query again");
}
RcFileEncoding rcFileEncoding;
if (LazyBinaryColumnarSerDe.class.getName().equals(storage.getStorageFormat().getSerDe())) {
rcFileEncoding = new BinaryRcFileEncoding();
} else if (ColumnarSerDe.class.getName().equals(storage.getStorageFormat().getSerDe())) {
rcFileEncoding = createTextVectorEncoding(getHiveSchema(storage.getSerdeParameters(), tableParameters), hiveStorageTimeZone);
} else {
return Optional.empty();
}
if (fileSize == 0) {
throw new PrestoException(HIVE_BAD_DATA, "RCFile is empty: " + path);
}
FSDataInputStream inputStream;
try {
inputStream = hdfsEnvironment.getFileSystem(session.getUser(), path, configuration).openFile(path, hiveFileContext);
} catch (Exception e) {
if (nullToEmpty(e.getMessage()).trim().equals("Filesystem closed") || e instanceof FileNotFoundException) {
throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, e);
}
throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, splitError(e, path, start, length), e);
}
try {
ImmutableMap.Builder<Integer, Type> readColumns = ImmutableMap.builder();
for (HiveColumnHandle column : columns) {
readColumns.put(column.getHiveColumnIndex(), column.getHiveType().getType(typeManager));
}
RcFileReader rcFileReader = new RcFileReader(new HdfsRcFileDataSource(path.toString(), inputStream, fileSize, stats), rcFileEncoding, readColumns.build(), new AircompressorCodecFactory(new HadoopCodecFactory(configuration.getClassLoader())), start, length, new DataSize(8, Unit.MEGABYTE));
return Optional.of(new RcFilePageSource(rcFileReader, columns, typeManager));
} catch (Throwable e) {
try {
inputStream.close();
} catch (IOException ignored) {
}
if (e instanceof PrestoException) {
throw (PrestoException) e;
}
String message = splitError(e, path, start, length);
if (e instanceof RcFileCorruptionException) {
throw new PrestoException(HIVE_BAD_DATA, message, e);
}
if (e.getClass().getSimpleName().equals("BlockMissingException")) {
throw new PrestoException(HIVE_MISSING_DATA, message, e);
}
throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, message, e);
}
}
Aggregations