use of com.facebook.presto.common.predicate.TupleDomain in project presto by prestodb.
the class JmxSplitManager method getSplits.
@Override
public ConnectorSplitSource getSplits(ConnectorTransactionHandle transaction, ConnectorSession session, ConnectorTableLayoutHandle layout, SplitSchedulingContext splitSchedulingContext) {
JmxTableLayoutHandle jmxLayout = (JmxTableLayoutHandle) layout;
JmxTableHandle tableHandle = jmxLayout.getTable();
TupleDomain<ColumnHandle> predicate = jmxLayout.getConstraint();
// TODO is there a better way to get the node column?
Optional<JmxColumnHandle> nodeColumnHandle = tableHandle.getColumnHandles().stream().filter(jmxColumnHandle -> jmxColumnHandle.getColumnName().equals(NODE_COLUMN_NAME)).findFirst();
checkState(nodeColumnHandle.isPresent(), "Failed to find %s column", NODE_COLUMN_NAME);
List<ConnectorSplit> splits = nodeManager.getAllNodes().stream().filter(node -> {
NullableValue value = NullableValue.of(createUnboundedVarcharType(), utf8Slice(node.getNodeIdentifier()));
return predicate.overlaps(fromFixedValues(ImmutableMap.of(nodeColumnHandle.get(), value)));
}).map(node -> new JmxSplit(tableHandle, ImmutableList.of(node.getHostAndPort()))).collect(toList());
return new FixedSplitSource(splits);
}
use of com.facebook.presto.common.predicate.TupleDomain in project presto by prestodb.
the class IcebergPageSourceProvider method createParquetPageSource.
private static ConnectorPageSource createParquetPageSource(HdfsEnvironment hdfsEnvironment, String user, Configuration configuration, Path path, long start, long length, SchemaTableName tableName, List<IcebergColumnHandle> regularColumns, boolean useParquetColumnNames, DataSize maxReadBlockSize, boolean batchReaderEnabled, boolean verificationEnabled, TupleDomain<IcebergColumnHandle> effectivePredicate, FileFormatDataSourceStats fileFormatDataSourceStats, boolean columnIndexFilterEnabled) {
AggregatedMemoryContext systemMemoryContext = newSimpleAggregatedMemoryContext();
ParquetDataSource dataSource = null;
try {
ExtendedFileSystem fileSystem = hdfsEnvironment.getFileSystem(user, path, configuration);
FileStatus fileStatus = fileSystem.getFileStatus(path);
long fileSize = fileStatus.getLen();
long modificationTime = fileStatus.getModificationTime();
HiveFileContext hiveFileContext = new HiveFileContext(true, NO_CACHE_CONSTRAINTS, Optional.empty(), Optional.of(fileSize), modificationTime, false);
FSDataInputStream inputStream = fileSystem.openFile(path, hiveFileContext);
dataSource = buildHdfsParquetDataSource(inputStream, path, fileFormatDataSourceStats);
ParquetMetadata parquetMetadata = MetadataReader.readFooter(dataSource, fileSize).getParquetMetadata();
FileMetaData fileMetaData = parquetMetadata.getFileMetaData();
MessageType fileSchema = fileMetaData.getSchema();
// Mapping from Iceberg field ID to Parquet fields.
Map<Integer, org.apache.parquet.schema.Type> parquetIdToField = fileSchema.getFields().stream().filter(field -> field.getId() != null).collect(toImmutableMap(field -> field.getId().intValue(), Function.identity()));
List<org.apache.parquet.schema.Type> parquetFields = regularColumns.stream().map(column -> {
if (parquetIdToField.isEmpty()) {
// This is a migrated table
return getParquetTypeByName(column.getName(), fileSchema);
}
return parquetIdToField.get(column.getId());
}).collect(toList());
// TODO: support subfield pushdown
MessageType requestedSchema = new MessageType(fileSchema.getName(), parquetFields.stream().filter(Objects::nonNull).collect(toImmutableList()));
Map<List<String>, RichColumnDescriptor> descriptorsByPath = getDescriptors(fileSchema, requestedSchema);
TupleDomain<ColumnDescriptor> parquetTupleDomain = getParquetTupleDomain(descriptorsByPath, effectivePredicate);
Predicate parquetPredicate = buildPredicate(requestedSchema, parquetTupleDomain, descriptorsByPath);
final ParquetDataSource finalDataSource = dataSource;
List<BlockMetaData> blocks = new ArrayList<>();
List<ColumnIndexStore> blockIndexStores = new ArrayList<>();
for (BlockMetaData block : parquetMetadata.getBlocks()) {
long firstDataPage = block.getColumns().get(0).getFirstDataPageOffset();
Optional<ColumnIndexStore> columnIndexStore = ColumnIndexFilterUtils.getColumnIndexStore(parquetPredicate, finalDataSource, block, descriptorsByPath, columnIndexFilterEnabled);
if ((firstDataPage >= start) && (firstDataPage < (start + length)) && predicateMatches(parquetPredicate, block, dataSource, descriptorsByPath, parquetTupleDomain, columnIndexStore, columnIndexFilterEnabled)) {
blocks.add(block);
blockIndexStores.add(columnIndexStore.orElse(null));
}
}
MessageColumnIO messageColumnIO = getColumnIO(fileSchema, requestedSchema);
ParquetReader parquetReader = new ParquetReader(messageColumnIO, blocks, dataSource, systemMemoryContext, maxReadBlockSize, batchReaderEnabled, verificationEnabled, parquetPredicate, blockIndexStores, columnIndexFilterEnabled);
ImmutableList.Builder<String> namesBuilder = ImmutableList.builder();
ImmutableList.Builder<Type> prestoTypes = ImmutableList.builder();
ImmutableList.Builder<Optional<Field>> internalFields = ImmutableList.builder();
for (int columnIndex = 0; columnIndex < regularColumns.size(); columnIndex++) {
IcebergColumnHandle column = regularColumns.get(columnIndex);
namesBuilder.add(column.getName());
org.apache.parquet.schema.Type parquetField = parquetFields.get(columnIndex);
Type prestoType = column.getType();
prestoTypes.add(prestoType);
if (parquetField == null) {
internalFields.add(Optional.empty());
} else {
internalFields.add(constructField(column.getType(), messageColumnIO.getChild(parquetField.getName())));
}
}
return new ParquetPageSource(parquetReader, prestoTypes.build(), internalFields.build(), namesBuilder.build(), new RuntimeStats());
} catch (Exception e) {
try {
if (dataSource != null) {
dataSource.close();
}
} catch (IOException ignored) {
}
if (e instanceof PrestoException) {
throw (PrestoException) e;
}
String message = format("Error opening Iceberg split %s (offset=%s, length=%s): %s", path, start, length, e.getMessage());
if (e instanceof ParquetCorruptionException) {
throw new PrestoException(ICEBERG_BAD_DATA, message, e);
}
if (e instanceof BlockMissingException) {
throw new PrestoException(ICEBERG_MISSING_DATA, message, e);
}
throw new PrestoException(ICEBERG_CANNOT_OPEN_SPLIT, message, e);
}
}
use of com.facebook.presto.common.predicate.TupleDomain in project presto by prestodb.
the class IcebergUtil method getTableScan.
public static TableScan getTableScan(TupleDomain<IcebergColumnHandle> predicates, Optional<Long> snapshotId, Table icebergTable) {
Expression expression = ExpressionConverter.toIcebergExpression(predicates);
TableScan tableScan = icebergTable.newScan().filter(expression);
return snapshotId.map(id -> isSnapshot(icebergTable, id) ? tableScan.useSnapshot(id) : tableScan.asOfTime(id)).orElse(tableScan);
}
use of com.facebook.presto.common.predicate.TupleDomain in project presto by prestodb.
the class CassandraPartitionManager method getPartitionKeysList.
private static List<Set<Object>> getPartitionKeysList(CassandraTable table, TupleDomain<ColumnHandle> tupleDomain) {
ImmutableList.Builder<Set<Object>> partitionColumnValues = ImmutableList.builder();
for (CassandraColumnHandle columnHandle : table.getPartitionKeyColumns()) {
Domain domain = tupleDomain.getDomains().get().get(columnHandle);
// if there is no constraint on a partition key, return an empty set
if (domain == null) {
return ImmutableList.of();
}
// todo does cassandra allow null partition keys?
if (domain.isNullAllowed()) {
return ImmutableList.of();
}
Set<Object> values = domain.getValues().getValuesProcessor().transform(ranges -> {
ImmutableSet.Builder<Object> columnValues = ImmutableSet.builder();
for (Range range : ranges.getOrderedRanges()) {
// if the range is not a single value, we can not perform partition pruning
if (!range.isSingleValue()) {
return ImmutableSet.of();
}
Object value = range.getSingleValue();
CassandraType valueType = columnHandle.getCassandraType();
if (valueType.isSupportedPartitionKey()) {
columnValues.add(value);
}
}
return columnValues.build();
}, discreteValues -> {
if (discreteValues.isWhiteList()) {
return ImmutableSet.copyOf(discreteValues.getValues());
}
return ImmutableSet.of();
}, allOrNone -> ImmutableSet.of());
partitionColumnValues.add(values);
}
return partitionColumnValues.build();
}
use of com.facebook.presto.common.predicate.TupleDomain in project presto by prestodb.
the class KuduClientSession method buildKuduSplits.
public List<KuduSplit> buildKuduSplits(KuduTableLayoutHandle layoutHandle) {
reTryKerberos(kerberosAuthEnabled);
KuduTableHandle tableHandle = layoutHandle.getTableHandle();
KuduTable table = tableHandle.getTable(this);
final int primaryKeyColumnCount = table.getSchema().getPrimaryKeyColumnCount();
KuduScanToken.KuduScanTokenBuilder builder = client.newScanTokenBuilder(table);
TupleDomain<ColumnHandle> constraintSummary = layoutHandle.getConstraintSummary();
if (!addConstraintPredicates(table, builder, constraintSummary)) {
return ImmutableList.of();
}
Optional<Set<ColumnHandle>> desiredColumns = layoutHandle.getDesiredColumns();
if (desiredColumns.isPresent()) {
if (desiredColumns.get().contains(KuduColumnHandle.ROW_ID_HANDLE)) {
List<Integer> columnIndexes = IntStream.range(0, primaryKeyColumnCount).boxed().collect(Collectors.toList());
for (ColumnHandle columnHandle : desiredColumns.get()) {
if (columnHandle instanceof KuduColumnHandle) {
KuduColumnHandle k = (KuduColumnHandle) columnHandle;
int index = k.getOrdinalPosition();
if (index >= primaryKeyColumnCount) {
columnIndexes.add(index);
}
}
}
builder.setProjectedColumnIndexes(columnIndexes);
} else {
List<Integer> columnIndexes = desiredColumns.get().stream().map(handle -> ((KuduColumnHandle) handle).getOrdinalPosition()).collect(toImmutableList());
builder.setProjectedColumnIndexes(columnIndexes);
}
}
List<KuduScanToken> tokens = builder.build();
return tokens.stream().map(token -> toKuduSplit(tableHandle, token, primaryKeyColumnCount)).collect(toImmutableList());
}
Aggregations