use of com.facebook.presto.common.Subfield in project presto by prestodb.
the class StructSelectiveStreamReader method getTopLevelFilter.
private static Optional<TupleDomainFilter> getTopLevelFilter(Map<Subfield, TupleDomainFilter> filters) {
Map<Subfield, TupleDomainFilter> topLevelFilters = Maps.filterEntries(filters, entry -> entry.getKey().getPath().isEmpty());
if (topLevelFilters.isEmpty()) {
return Optional.empty();
}
checkArgument(topLevelFilters.size() == 1, "ROW column may have at most one top-level range filter");
TupleDomainFilter filter = Iterables.getOnlyElement(topLevelFilters.values());
checkArgument(filter == IS_NULL || filter == IS_NOT_NULL, "Top-level range filter on ROW column must be IS NULL or IS NOT NULL");
return Optional.of(filter);
}
use of com.facebook.presto.common.Subfield in project presto by prestodb.
the class StructSelectiveStreamReader method checkMissingFieldFilters.
private boolean checkMissingFieldFilters(Collection<StreamDescriptor> nestedStreams, Map<Subfield, TupleDomainFilter> filters) {
if (filters.isEmpty()) {
return true;
}
Set<String> presentFieldNames = nestedStreams.stream().map(StreamDescriptor::getFieldName).map(name -> name.toLowerCase(Locale.ENGLISH)).collect(toImmutableSet());
for (Map.Entry<Subfield, TupleDomainFilter> entry : filters.entrySet()) {
Subfield subfield = entry.getKey();
if (subfield.getPath().isEmpty()) {
continue;
}
String fieldName = ((Subfield.NestedField) subfield.getPath().get(0)).getName();
if (presentFieldNames.contains(fieldName)) {
continue;
}
// Check out the filter. If filter allows nulls, then all rows pass, otherwise, no row passes.
TupleDomainFilter filter = entry.getValue();
checkArgument(filter.isDeterministic(), "Non-deterministic range filters are not supported yet");
if (!filter.testNull()) {
return false;
}
}
return true;
}
use of com.facebook.presto.common.Subfield in project presto by prestodb.
the class OrcTester method testRow.
private static boolean testRow(List<Type> types, List<List<?>> values, int row, Map<Integer, Map<Subfield, TupleDomainFilter>> columnFilters) {
for (int column = 0; column < types.size(); column++) {
Map<Subfield, TupleDomainFilter> filters = columnFilters.get(column);
if (filters == null) {
continue;
}
Type type = types.get(column);
Object value = values.get(column).get(row);
for (Map.Entry<Subfield, TupleDomainFilter> entry : filters.entrySet()) {
if (!testSubfieldValue(type, value, entry.getKey(), entry.getValue())) {
return false;
}
}
}
return true;
}
use of com.facebook.presto.common.Subfield in project presto by prestodb.
the class OrcTester method assertFileContentsPresto.
private static void assertFileContentsPresto(List<Type> types, TempFile tempFile, List<List<?>> expectedValues, boolean skipFirstBatch, boolean skipStripe, OrcEncoding orcEncoding, Format format, boolean isHiveWriter, boolean useSelectiveOrcReader, List<OrcReaderSettings> settings, Map<Integer, Slice> intermediateEncryptionKeys) throws IOException {
OrcPredicate orcPredicate = createOrcPredicate(types, expectedValues, format, isHiveWriter);
Map<Integer, Type> includedColumns = IntStream.range(0, types.size()).boxed().collect(toImmutableMap(Function.identity(), types::get));
List<Integer> outputColumns = IntStream.range(0, types.size()).boxed().collect(toImmutableList());
if (useSelectiveOrcReader) {
assertFileContentsPresto(types, tempFile.getFile(), expectedValues, orcEncoding, orcPredicate, Optional.empty(), ImmutableList.of(), ImmutableMap.of(), ImmutableMap.of(), intermediateEncryptionKeys, includedColumns, outputColumns);
for (OrcReaderSettings entry : settings) {
assertTrue(entry.getFilterFunctions().isEmpty(), "Filter functions are not supported yet");
assertTrue(entry.getFilterFunctionInputMapping().isEmpty(), "Filter functions are not supported yet");
Map<Integer, Map<Subfield, TupleDomainFilter>> columnFilters = entry.getColumnFilters();
List<List<?>> prunedAndFilteredRows = pruneValues(types, filterRows(types, expectedValues, columnFilters), entry.getRequiredSubfields());
Optional<TupleDomainFilterOrderChecker> orderChecker = Optional.empty();
List<Integer> expectedFilterOrder = entry.getExpectedFilterOrder();
if (!expectedFilterOrder.isEmpty()) {
orderChecker = Optional.of(new TupleDomainFilterOrderChecker(expectedFilterOrder));
}
Optional<Map<Integer, Map<Subfield, TupleDomainFilter>>> transformedFilters = Optional.of(orderChecker.map(checker -> addOrderTracking(columnFilters, checker)).orElse(columnFilters));
assertFileContentsPresto(types, tempFile.getFile(), prunedAndFilteredRows, orcEncoding, orcPredicate, transformedFilters, entry.getFilterFunctions(), entry.getFilterFunctionInputMapping(), entry.getRequiredSubfields());
orderChecker.ifPresent(TupleDomainFilterOrderChecker::assertOrder);
}
return;
}
try (OrcBatchRecordReader recordReader = createCustomOrcRecordReader(tempFile, orcEncoding, orcPredicate, types, MAX_BATCH_SIZE, new StorageOrcFileTailSource(), new StorageStripeMetadataSource(), false, intermediateEncryptionKeys, false)) {
assertEquals(recordReader.getReaderPosition(), 0);
assertEquals(recordReader.getFilePosition(), 0);
boolean isFirst = true;
int rowsProcessed = 0;
for (int batchSize = toIntExact(recordReader.nextBatch()); batchSize >= 0; batchSize = toIntExact(recordReader.nextBatch())) {
if (skipStripe && rowsProcessed < 10000) {
// skip recordReader.readBlock
} else if (skipFirstBatch && isFirst) {
// skip recordReader.readBlock
isFirst = false;
} else {
for (int i = 0; i < types.size(); i++) {
Type type = types.get(i);
Block block = recordReader.readBlock(i);
assertEquals(block.getPositionCount(), batchSize);
checkNullValues(type, block);
assertBlockEquals(type, block, expectedValues.get(i), rowsProcessed);
}
}
assertEquals(recordReader.getReaderPosition(), rowsProcessed);
assertEquals(recordReader.getFilePosition(), rowsProcessed);
rowsProcessed += batchSize;
}
assertEquals(rowsProcessed, expectedValues.get(0).size());
assertEquals(recordReader.getReaderPosition(), rowsProcessed);
assertEquals(recordReader.getFilePosition(), rowsProcessed);
}
}
use of com.facebook.presto.common.Subfield in project presto by prestodb.
the class TestSelectiveOrcReader method testArraysWithSubfieldPruning.
@Test
public void testArraysWithSubfieldPruning() throws Exception {
tester.assertRoundTripWithSettings(arrayType(INTEGER), createList(NUM_ROWS, i -> ImmutableList.of(1, 2, 3, 4)), ImmutableList.of(OrcReaderSettings.builder().addRequiredSubfields(0, "c[1]").build(), OrcReaderSettings.builder().addRequiredSubfields(0, "c[1]", "c[2]").build(), OrcReaderSettings.builder().addRequiredSubfields(0, "c[2]").build()));
Random random = new Random(0);
tester.assertRoundTripWithSettings(arrayType(INTEGER), createList(NUM_ROWS, i -> ImmutableList.of(random.nextInt(10), random.nextInt(10), 3, 4)), ImmutableList.of(OrcReaderSettings.builder().addRequiredSubfields(0, "c[1]", "c[3]").setColumnFilters(ImmutableMap.of(0, ImmutableMap.of(new Subfield("c[1]"), BigintRange.of(0, 4, false)))).build(), OrcReaderSettings.builder().addRequiredSubfields(0, "c[2]", "c[3]").setColumnFilters(ImmutableMap.of(0, ImmutableMap.of(new Subfield("c[2]"), BigintRange.of(0, 4, false)))).build()));
// arrays of arrays
tester.assertRoundTripWithSettings(arrayType(arrayType(INTEGER)), createList(NUM_ROWS, i -> nCopies(1 + random.nextInt(5), ImmutableList.of(1, 2, 3))), ImmutableList.of(OrcReaderSettings.builder().addRequiredSubfields(0, "c[1][1]").build(), OrcReaderSettings.builder().addRequiredSubfields(0, "c[2][2]", "c[4][2]", "c[5][3]").build(), OrcReaderSettings.builder().addRequiredSubfields(0, "c[2][3]", "c[10][2]", "c[3][10]").build()));
// arrays of maps
tester.assertRoundTripWithSettings(arrayType(mapType(INTEGER, INTEGER)), createList(NUM_ROWS, i -> nCopies(5, ImmutableMap.of(1, 10, 2, 20))), ImmutableList.of(OrcReaderSettings.builder().addRequiredSubfields(0, "c[1][1]").build(), OrcReaderSettings.builder().addRequiredSubfields(0, "c[2][1]").build(), OrcReaderSettings.builder().addRequiredSubfields(0, "c[2][1]", "c[4][1]", "c[3][2]").build()));
}
Aggregations