use of com.facebook.presto.spi.predicate.TupleDomain in project presto by prestodb.
the class ParquetHiveRecordCursor method createParquetRecordReader.
private ParquetRecordReader<FakeParquetRecord> createParquetRecordReader(HdfsEnvironment hdfsEnvironment, String sessionUser, Configuration configuration, Path path, long start, long length, List<HiveColumnHandle> columns, boolean useParquetColumnNames, TypeManager typeManager, boolean predicatePushdownEnabled, TupleDomain<HiveColumnHandle> effectivePredicate) {
ParquetDataSource dataSource = null;
try {
FileSystem fileSystem = hdfsEnvironment.getFileSystem(sessionUser, path, configuration);
dataSource = buildHdfsParquetDataSource(fileSystem, path, start, length);
ParquetMetadata parquetMetadata = hdfsEnvironment.doAs(sessionUser, () -> ParquetFileReader.readFooter(configuration, path, NO_FILTER));
List<BlockMetaData> blocks = parquetMetadata.getBlocks();
FileMetaData fileMetaData = parquetMetadata.getFileMetaData();
MessageType fileSchema = fileMetaData.getSchema();
PrestoReadSupport readSupport = new PrestoReadSupport(useParquetColumnNames, columns, fileSchema);
List<parquet.schema.Type> fields = columns.stream().filter(column -> column.getColumnType() == REGULAR).map(column -> getParquetType(column, fileSchema, useParquetColumnNames)).filter(Objects::nonNull).collect(toList());
MessageType requestedSchema = new MessageType(fileSchema.getName(), fields);
LongArrayList offsets = new LongArrayList(blocks.size());
for (BlockMetaData block : blocks) {
long firstDataPage = block.getColumns().get(0).getFirstDataPageOffset();
if (firstDataPage >= start && firstDataPage < start + length) {
if (predicatePushdownEnabled) {
ParquetPredicate parquetPredicate = buildParquetPredicate(columns, effectivePredicate, fileMetaData.getSchema(), typeManager);
if (predicateMatches(parquetPredicate, block, dataSource, requestedSchema, effectivePredicate)) {
offsets.add(block.getStartingPos());
}
} else {
offsets.add(block.getStartingPos());
}
}
}
ParquetInputSplit split = new ParquetInputSplit(path, start, start + length, length, null, offsets.toLongArray());
TaskAttemptContext taskContext = ContextUtil.newTaskAttemptContext(configuration, new TaskAttemptID());
return hdfsEnvironment.doAs(sessionUser, () -> {
ParquetRecordReader<FakeParquetRecord> realReader = new PrestoParquetRecordReader(readSupport);
realReader.initialize(split, taskContext);
return realReader;
});
} catch (Exception e) {
Throwables.propagateIfInstanceOf(e, PrestoException.class);
if (e instanceof InterruptedException) {
Thread.currentThread().interrupt();
throw Throwables.propagate(e);
}
String message = format("Error opening Hive split %s (offset=%s, length=%s): %s", path, start, length, e.getMessage());
if (e.getClass().getSimpleName().equals("BlockMissingException")) {
throw new PrestoException(HIVE_MISSING_DATA, message, e);
}
throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, message, e);
} finally {
if (dataSource != null) {
try {
dataSource.close();
} catch (IOException ignored) {
}
}
}
}
use of com.facebook.presto.spi.predicate.TupleDomain in project presto by prestodb.
the class ParquetPageSourceFactory method createParquetPageSource.
public static ParquetPageSource createParquetPageSource(HdfsEnvironment hdfsEnvironment, String user, Configuration configuration, Path path, long start, long length, Properties schema, List<HiveColumnHandle> columns, boolean useParquetColumnNames, TypeManager typeManager, boolean predicatePushdownEnabled, TupleDomain<HiveColumnHandle> effectivePredicate) {
AggregatedMemoryContext systemMemoryContext = new AggregatedMemoryContext();
ParquetDataSource dataSource = null;
try {
FileSystem fileSystem = hdfsEnvironment.getFileSystem(user, path, configuration);
dataSource = buildHdfsParquetDataSource(fileSystem, path, start, length);
ParquetMetadata parquetMetadata = ParquetMetadataReader.readFooter(fileSystem, path);
FileMetaData fileMetaData = parquetMetadata.getFileMetaData();
MessageType fileSchema = fileMetaData.getSchema();
List<parquet.schema.Type> fields = columns.stream().filter(column -> column.getColumnType() == REGULAR).map(column -> getParquetType(column, fileSchema, useParquetColumnNames)).filter(Objects::nonNull).collect(toList());
MessageType requestedSchema = new MessageType(fileSchema.getName(), fields);
List<BlockMetaData> blocks = new ArrayList<>();
for (BlockMetaData block : parquetMetadata.getBlocks()) {
long firstDataPage = block.getColumns().get(0).getFirstDataPageOffset();
if (firstDataPage >= start && firstDataPage < start + length) {
blocks.add(block);
}
}
if (predicatePushdownEnabled) {
ParquetPredicate parquetPredicate = buildParquetPredicate(columns, effectivePredicate, fileMetaData.getSchema(), typeManager);
final ParquetDataSource finalDataSource = dataSource;
blocks = blocks.stream().filter(block -> predicateMatches(parquetPredicate, block, finalDataSource, requestedSchema, effectivePredicate)).collect(toList());
}
ParquetReader parquetReader = new ParquetReader(fileSchema, requestedSchema, blocks, dataSource, typeManager, systemMemoryContext);
return new ParquetPageSource(parquetReader, dataSource, fileSchema, requestedSchema, length, schema, columns, effectivePredicate, typeManager, useParquetColumnNames, systemMemoryContext);
} catch (Exception e) {
try {
if (dataSource != null) {
dataSource.close();
}
} catch (IOException ignored) {
}
if (e instanceof PrestoException) {
throw (PrestoException) e;
}
String message = format("Error opening Hive split %s (offset=%s, length=%s): %s", path, start, length, e.getMessage());
if (e.getClass().getSimpleName().equals("BlockMissingException")) {
throw new PrestoException(HIVE_MISSING_DATA, message, e);
}
throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, message, e);
}
}
use of com.facebook.presto.spi.predicate.TupleDomain in project presto by prestodb.
the class TestDomainTranslator method testToPredicateAllIgnored.
@Test
public void testToPredicateAllIgnored() throws Exception {
TupleDomain<Symbol> tupleDomain = withColumnDomains(ImmutableMap.<Symbol, Domain>builder().put(C_BIGINT, Domain.singleValue(BIGINT, 1L)).put(C_DOUBLE, Domain.onlyNull(DOUBLE)).put(C_VARCHAR, Domain.notNull(VARCHAR)).put(C_BOOLEAN, Domain.all(BOOLEAN)).build());
ExtractionResult result = fromPredicate(toPredicate(tupleDomain));
assertEquals(result.getRemainingExpression(), TRUE_LITERAL);
assertEquals(result.getTupleDomain(), withColumnDomains(ImmutableMap.<Symbol, Domain>builder().put(C_BIGINT, Domain.singleValue(BIGINT, 1L)).put(C_DOUBLE, Domain.onlyNull(DOUBLE)).put(C_VARCHAR, Domain.notNull(VARCHAR)).build()));
}
use of com.facebook.presto.spi.predicate.TupleDomain in project presto by prestodb.
the class ShardPredicate method create.
public static ShardPredicate create(TupleDomain<RaptorColumnHandle> tupleDomain, boolean bucketed) {
StringJoiner predicate = new StringJoiner(" AND ").setEmptyValue("true");
ImmutableList.Builder<JDBCType> types = ImmutableList.builder();
ImmutableList.Builder<Object> values = ImmutableList.builder();
for (Entry<RaptorColumnHandle, Domain> entry : tupleDomain.getDomains().get().entrySet()) {
Domain domain = entry.getValue();
if (domain.isNullAllowed() || domain.isAll()) {
continue;
}
RaptorColumnHandle handle = entry.getKey();
Type type = handle.getColumnType();
JDBCType jdbcType = jdbcType(type);
if (jdbcType == null) {
continue;
}
if (handle.isShardUuid()) {
predicate.add(createShardPredicate(types, values, domain, jdbcType));
continue;
}
if (!domain.getType().isOrderable()) {
continue;
}
Ranges ranges = domain.getValues().getRanges();
// TODO: support multiple ranges
if (ranges.getRangeCount() != 1) {
continue;
}
Range range = getOnlyElement(ranges.getOrderedRanges());
Object minValue = null;
Object maxValue = null;
if (range.isSingleValue()) {
minValue = range.getSingleValue();
maxValue = range.getSingleValue();
} else {
if (!range.getLow().isLowerUnbounded()) {
minValue = range.getLow().getValue();
}
if (!range.getHigh().isUpperUnbounded()) {
maxValue = range.getHigh().getValue();
}
}
String min;
String max;
if (handle.isBucketNumber()) {
if (!bucketed) {
predicate.add("false");
continue;
}
min = "bucket_number";
max = "bucket_number";
} else {
min = minColumn(handle.getColumnId());
max = maxColumn(handle.getColumnId());
}
if (minValue != null) {
predicate.add(format("(%s >= ? OR %s IS NULL)", max, max));
types.add(jdbcType);
values.add(minValue);
}
if (maxValue != null) {
predicate.add(format("(%s <= ? OR %s IS NULL)", min, min));
types.add(jdbcType);
values.add(maxValue);
}
}
return new ShardPredicate(predicate.toString(), types.build(), values.build());
}
use of com.facebook.presto.spi.predicate.TupleDomain in project presto by prestodb.
the class ShardMetadataRecordCursor method getTableIds.
@VisibleForTesting
static Iterator<Long> getTableIds(IDBI dbi, TupleDomain<Integer> tupleDomain) {
Map<Integer, Domain> domains = tupleDomain.getDomains().get();
Domain schemaNameDomain = domains.get(getColumnIndex(SHARD_METADATA, SCHEMA_NAME));
Domain tableNameDomain = domains.get(getColumnIndex(SHARD_METADATA, TABLE_NAME));
List<String> values = new ArrayList<>();
StringBuilder sql = new StringBuilder("SELECT table_id FROM tables ");
if (schemaNameDomain != null || tableNameDomain != null) {
sql.append("WHERE ");
List<String> predicates = new ArrayList<>();
if (tableNameDomain != null && tableNameDomain.isSingleValue()) {
predicates.add("table_name = ?");
values.add(getStringValue(tableNameDomain.getSingleValue()));
}
if (schemaNameDomain != null && schemaNameDomain.isSingleValue()) {
predicates.add("schema_name = ?");
values.add(getStringValue(schemaNameDomain.getSingleValue()));
}
sql.append(Joiner.on(" AND ").join(predicates));
}
ImmutableList.Builder<Long> tableIds = ImmutableList.builder();
try (Connection connection = dbi.open().getConnection();
PreparedStatement statement = connection.prepareStatement(sql.toString())) {
for (int i = 0; i < values.size(); i++) {
statement.setString(i + 1, values.get(i));
}
try (ResultSet resultSet = statement.executeQuery()) {
while (resultSet.next()) {
tableIds.add(resultSet.getLong("table_id"));
}
}
} catch (SQLException | DBIException e) {
throw metadataError(e);
}
return tableIds.build().iterator();
}
Aggregations