use of com.facebook.presto.common.Subfield in project presto by prestodb.
the class TestSelectiveOrcReader method testHiddenConstantColumns.
@Test
public void testHiddenConstantColumns() throws Exception {
Type type = BIGINT;
List<Type> types = ImmutableList.of(type);
List<List<?>> values = ImmutableList.of(ImmutableList.of(1L, 2L));
TempFile tempFile = new TempFile();
writeOrcColumnsPresto(tempFile.getFile(), DWRF, ZSTD, Optional.empty(), types, values, new OrcWriterStats());
// Hidden columns like partition columns use negative indices (-13).
int hiddenColumnIndex = -13;
Map<Integer, Type> includedColumns = ImmutableMap.of(hiddenColumnIndex, VARCHAR, 0, BIGINT);
List<Integer> outputColumns = ImmutableList.of(hiddenColumnIndex, 0);
Slice constantSlice = Slices.utf8Slice("partition_value");
Map<Integer, Object> constantValues = ImmutableMap.of(hiddenColumnIndex, constantSlice);
OrcAggregatedMemoryContext systemMemoryUsage = new TestingHiveOrcAggregatedMemoryContext();
TupleDomainFilter filter = BigintRange.of(1, 1, false);
Map<Subfield, TupleDomainFilter> subFieldFilter = toSubfieldFilter(filter);
OrcReaderSettings readerSettings = OrcTester.OrcReaderSettings.builder().setColumnFilters(ImmutableMap.of(0, subFieldFilter)).build();
try (OrcSelectiveRecordReader recordReader = createCustomOrcSelectiveRecordReader(tempFile.getFile(), DWRF.getOrcEncoding(), OrcPredicate.TRUE, types, 1, readerSettings.getColumnFilters(), readerSettings.getFilterFunctions(), readerSettings.getFilterFunctionInputMapping(), readerSettings.getRequiredSubfields(), constantValues, ImmutableMap.of(), includedColumns, outputColumns, false, systemMemoryUsage, false)) {
Page page = recordReader.getNextPage();
assertEquals(page.getPositionCount(), 1);
Block partitionValueBlock = page.getBlock(0);
int length = partitionValueBlock.getSliceLength(0);
Slice varcharSlice = partitionValueBlock.getSlice(0, 0, length);
assertEquals(varcharSlice, constantSlice);
Block bigintBlock = page.getBlock(1);
assertEquals(bigintBlock.getLong(0), 1);
assertNull(recordReader.getNextPage());
}
}
use of com.facebook.presto.common.Subfield in project presto by prestodb.
the class TestMapFlatSelectiveStreamReader method runTest.
private <K, V> void runTest(String testOrcFileName, Type keyType, Type valueType, ExpectedValuesBuilder<K, V> expectedValuesBuilder) throws Exception {
List<Map<K, V>> expectedValues = expectedValuesBuilder.build();
Type mapType = mapType(keyType, valueType);
OrcPredicate orcPredicate = createOrcPredicate(0, mapType, expectedValues, OrcTester.Format.DWRF, true);
runTest(testOrcFileName, mapType, expectedValues, orcPredicate, Optional.empty(), ImmutableList.of());
runTest(testOrcFileName, mapType, expectedValues.stream().filter(Objects::isNull).collect(toList()), orcPredicate, Optional.of(IS_NULL), ImmutableList.of());
runTest(testOrcFileName, mapType, expectedValues.stream().filter(Objects::nonNull).collect(toList()), orcPredicate, Optional.of(IS_NOT_NULL), ImmutableList.of());
if (keyType != VARBINARY) {
// read only some keys
List<K> keys = expectedValues.stream().filter(Objects::nonNull).flatMap(v -> v.keySet().stream()).distinct().collect(toImmutableList());
if (!keys.isEmpty()) {
List<K> requiredKeys = ImmutableList.of(keys.get(0));
runTest(testOrcFileName, mapType, pruneMaps(expectedValues, requiredKeys), orcPredicate, Optional.empty(), toSubfields(keyType, requiredKeys));
List<Integer> keyIndices = ImmutableList.of(1, 3, 7, 11);
requiredKeys = keyIndices.stream().filter(k -> k < keys.size()).map(keys::get).collect(toList());
runTest(testOrcFileName, mapType, pruneMaps(expectedValues, requiredKeys), orcPredicate, Optional.empty(), toSubfields(keyType, requiredKeys));
}
}
// read only some rows
List<Integer> ids = IntStream.range(0, expectedValues.size()).map(i -> i % 10).boxed().collect(toImmutableList());
ImmutableList<Type> types = ImmutableList.of(mapType, INTEGER);
Map<Integer, Map<Subfield, TupleDomainFilter>> filters = ImmutableMap.of(1, ImmutableMap.of(new Subfield("c"), toBigintValues(new long[] { 1, 5, 6 }, true)));
assertFileContentsPresto(types, new File(getResource(testOrcFileName).getFile()), filterRows(types, ImmutableList.of(expectedValues, ids), filters), OrcEncoding.DWRF, OrcPredicate.TRUE, Optional.of(filters), ImmutableList.of(), ImmutableMap.of(), ImmutableMap.of());
TestingFilterFunction filterFunction = new TestingFilterFunction(mapType);
assertFileContentsPresto(types, new File(getResource(testOrcFileName).getFile()), filterFunction.filterRows(ImmutableList.of(expectedValues, ids)), OrcEncoding.DWRF, OrcPredicate.TRUE, Optional.empty(), ImmutableList.of(filterFunction), ImmutableMap.of(0, 0), ImmutableMap.of());
}
use of com.facebook.presto.common.Subfield in project presto by prestodb.
the class TestMapFlatSelectiveStreamReader method runTest.
private <K, V> void runTest(String testOrcFileName, Type mapType, List<Map<K, V>> expectedValues, OrcPredicate orcPredicate, Optional<TupleDomainFilter> filter, List<Subfield> subfields) throws Exception {
List<Type> types = ImmutableList.of(mapType);
Optional<Map<Integer, Map<Subfield, TupleDomainFilter>>> filters = filter.map(f -> ImmutableMap.of(new Subfield("c"), f)).map(f -> ImmutableMap.of(0, f));
assertFileContentsPresto(types, new File(getResource(testOrcFileName).getFile()), filters.map(f -> filterRows(types, ImmutableList.of(expectedValues), f)).orElse(ImmutableList.of(expectedValues)), OrcEncoding.DWRF, orcPredicate, filters, ImmutableList.of(), ImmutableMap.of(), ImmutableMap.of(0, subfields));
}
use of com.facebook.presto.common.Subfield in project presto by prestodb.
the class DeltaPageSourceProvider method createParquetPageSource.
private static ConnectorPageSource createParquetPageSource(HdfsEnvironment hdfsEnvironment, String user, Configuration configuration, Path path, long start, long length, long fileSize, List<DeltaColumnHandle> columns, SchemaTableName tableName, DataSize maxReadBlockSize, boolean batchReaderEnabled, boolean verificationEnabled, TypeManager typeManager, TupleDomain<DeltaColumnHandle> effectivePredicate, FileFormatDataSourceStats stats, boolean columnIndexFilterEnabled) {
AggregatedMemoryContext systemMemoryContext = newSimpleAggregatedMemoryContext();
ParquetDataSource dataSource = null;
try {
FSDataInputStream inputStream = hdfsEnvironment.getFileSystem(user, path, configuration).open(path);
dataSource = buildHdfsParquetDataSource(inputStream, path, stats);
ParquetMetadata parquetMetadata = MetadataReader.readFooter(dataSource, fileSize).getParquetMetadata();
FileMetaData fileMetaData = parquetMetadata.getFileMetaData();
MessageType fileSchema = fileMetaData.getSchema();
Optional<MessageType> message = columns.stream().filter(column -> column.getColumnType() == REGULAR || isPushedDownSubfield(column)).map(column -> getColumnType(typeManager.getType(column.getDataType()), fileSchema, column, tableName, path)).filter(Optional::isPresent).map(Optional::get).map(type -> new MessageType(fileSchema.getName(), type)).reduce(MessageType::union);
MessageType requestedSchema = message.orElse(new MessageType(fileSchema.getName(), ImmutableList.of()));
ImmutableList.Builder<BlockMetaData> footerBlocks = ImmutableList.builder();
for (BlockMetaData block : parquetMetadata.getBlocks()) {
long firstDataPage = block.getColumns().get(0).getFirstDataPageOffset();
if (firstDataPage >= start && firstDataPage < start + length) {
footerBlocks.add(block);
}
}
Map<List<String>, RichColumnDescriptor> descriptorsByPath = getDescriptors(fileSchema, requestedSchema);
TupleDomain<ColumnDescriptor> parquetTupleDomain = getParquetTupleDomain(descriptorsByPath, effectivePredicate);
Predicate parquetPredicate = buildPredicate(requestedSchema, parquetTupleDomain, descriptorsByPath);
final ParquetDataSource finalDataSource = dataSource;
ImmutableList.Builder<BlockMetaData> blocks = ImmutableList.builder();
List<ColumnIndexStore> blockIndexStores = new ArrayList<>();
for (BlockMetaData block : footerBlocks.build()) {
Optional<ColumnIndexStore> columnIndexStore = ColumnIndexFilterUtils.getColumnIndexStore(parquetPredicate, finalDataSource, block, descriptorsByPath, columnIndexFilterEnabled);
if (predicateMatches(parquetPredicate, block, finalDataSource, descriptorsByPath, parquetTupleDomain, columnIndexStore, columnIndexFilterEnabled)) {
blocks.add(block);
blockIndexStores.add(columnIndexStore.orElse(null));
}
}
MessageColumnIO messageColumnIO = getColumnIO(fileSchema, requestedSchema);
ParquetReader parquetReader = new ParquetReader(messageColumnIO, blocks.build(), dataSource, systemMemoryContext, maxReadBlockSize, batchReaderEnabled, verificationEnabled, parquetPredicate, blockIndexStores, columnIndexFilterEnabled);
ImmutableList.Builder<String> namesBuilder = ImmutableList.builder();
ImmutableList.Builder<Type> typesBuilder = ImmutableList.builder();
ImmutableList.Builder<Optional<Field>> fieldsBuilder = ImmutableList.builder();
for (DeltaColumnHandle column : columns) {
checkArgument(column.getColumnType() == REGULAR || column.getColumnType() == SUBFIELD, "column type must be regular or subfield column");
String name = column.getName();
Type type = typeManager.getType(column.getDataType());
namesBuilder.add(name);
typesBuilder.add(type);
if (isPushedDownSubfield(column)) {
Subfield pushedDownSubfield = getPushedDownSubfield(column);
List<String> nestedColumnPath = nestedColumnPath(pushedDownSubfield);
Optional<ColumnIO> columnIO = findNestedColumnIO(lookupColumnByName(messageColumnIO, pushedDownSubfield.getRootName()), nestedColumnPath);
if (columnIO.isPresent()) {
fieldsBuilder.add(constructField(type, columnIO.get()));
} else {
fieldsBuilder.add(Optional.empty());
}
} else if (getParquetType(type, fileSchema, column, tableName, path).isPresent()) {
fieldsBuilder.add(constructField(type, lookupColumnByName(messageColumnIO, name)));
} else {
fieldsBuilder.add(Optional.empty());
}
}
return new ParquetPageSource(parquetReader, typesBuilder.build(), fieldsBuilder.build(), namesBuilder.build(), new RuntimeStats());
} catch (Exception exception) {
try {
if (dataSource != null) {
dataSource.close();
}
} catch (IOException ignored) {
}
if (exception instanceof PrestoException) {
throw (PrestoException) exception;
}
if (exception instanceof ParquetCorruptionException) {
throw new PrestoException(DELTA_BAD_DATA, exception);
}
if (exception instanceof AccessControlException) {
throw new PrestoException(PERMISSION_DENIED, exception.getMessage(), exception);
}
if (nullToEmpty(exception.getMessage()).trim().equals("Filesystem closed") || exception instanceof FileNotFoundException) {
throw new PrestoException(DELTA_CANNOT_OPEN_SPLIT, exception);
}
String message = format("Error opening Hive split %s (offset=%s, length=%s): %s", path, start, length, exception.getMessage());
if (exception.getClass().getSimpleName().equals("BlockMissingException")) {
throw new PrestoException(DELTA_MISSING_DATA, message, exception);
}
throw new PrestoException(DELTA_CANNOT_OPEN_SPLIT, message, exception);
}
}
use of com.facebook.presto.common.Subfield in project presto by prestodb.
the class SubfieldExtractor method toRowExpression.
private RowExpression toRowExpression(Subfield subfield, List<Type> types) {
List<Subfield.PathElement> path = subfield.getPath();
if (path.isEmpty()) {
return new VariableReferenceExpression(Optional.empty(), subfield.getRootName(), types.get(0));
}
RowExpression base = toRowExpression(new Subfield(subfield.getRootName(), path.subList(0, path.size() - 1)), types.subList(0, types.size() - 1));
Type baseType = types.get(types.size() - 2);
Subfield.PathElement pathElement = path.get(path.size() - 1);
if (pathElement instanceof Subfield.LongSubscript) {
Type indexType = baseType instanceof MapType ? ((MapType) baseType).getKeyType() : BIGINT;
FunctionHandle functionHandle = functionResolution.subscriptFunction(baseType, indexType);
ConstantExpression index = new ConstantExpression(base.getSourceLocation(), ((Subfield.LongSubscript) pathElement).getIndex(), indexType);
return new CallExpression(base.getSourceLocation(), SUBSCRIPT.name(), functionHandle, types.get(types.size() - 1), ImmutableList.of(base, index));
}
if (pathElement instanceof Subfield.StringSubscript) {
Type indexType = ((MapType) baseType).getKeyType();
FunctionHandle functionHandle = functionResolution.subscriptFunction(baseType, indexType);
ConstantExpression index = new ConstantExpression(base.getSourceLocation(), Slices.utf8Slice(((Subfield.StringSubscript) pathElement).getIndex()), indexType);
return new CallExpression(base.getSourceLocation(), SUBSCRIPT.name(), functionHandle, types.get(types.size() - 1), ImmutableList.of(base, index));
}
if (pathElement instanceof Subfield.NestedField) {
Subfield.NestedField nestedField = (Subfield.NestedField) pathElement;
return new SpecialFormExpression(base.getSourceLocation(), DEREFERENCE, types.get(types.size() - 1), base, new ConstantExpression(base.getSourceLocation(), getFieldIndex((RowType) baseType, nestedField.getName()), INTEGER));
}
verify(false, "Unexpected path element: " + pathElement);
return null;
}
Aggregations