Search in sources :

Example 1 with Subfield

use of com.facebook.presto.common.Subfield in project presto by prestodb.

the class StructSelectiveStreamReader method getTopLevelFilter.

private static Optional<TupleDomainFilter> getTopLevelFilter(Map<Subfield, TupleDomainFilter> filters) {
    Map<Subfield, TupleDomainFilter> topLevelFilters = Maps.filterEntries(filters, entry -> entry.getKey().getPath().isEmpty());
    if (topLevelFilters.isEmpty()) {
        return Optional.empty();
    }
    checkArgument(topLevelFilters.size() == 1, "ROW column may have at most one top-level range filter");
    TupleDomainFilter filter = Iterables.getOnlyElement(topLevelFilters.values());
    checkArgument(filter == IS_NULL || filter == IS_NOT_NULL, "Top-level range filter on ROW column must be IS NULL or IS NOT NULL");
    return Optional.of(filter);
}
Also used : Subfield(com.facebook.presto.common.Subfield) TupleDomainFilter(com.facebook.presto.common.predicate.TupleDomainFilter)

Example 2 with Subfield

use of com.facebook.presto.common.Subfield in project presto by prestodb.

the class StructSelectiveStreamReader method checkMissingFieldFilters.

private boolean checkMissingFieldFilters(Collection<StreamDescriptor> nestedStreams, Map<Subfield, TupleDomainFilter> filters) {
    if (filters.isEmpty()) {
        return true;
    }
    Set<String> presentFieldNames = nestedStreams.stream().map(StreamDescriptor::getFieldName).map(name -> name.toLowerCase(Locale.ENGLISH)).collect(toImmutableSet());
    for (Map.Entry<Subfield, TupleDomainFilter> entry : filters.entrySet()) {
        Subfield subfield = entry.getKey();
        if (subfield.getPath().isEmpty()) {
            continue;
        }
        String fieldName = ((Subfield.NestedField) subfield.getPath().get(0)).getName();
        if (presentFieldNames.contains(fieldName)) {
            continue;
        }
        // Check out the filter. If filter allows nulls, then all rows pass, otherwise, no row passes.
        TupleDomainFilter filter = entry.getValue();
        checkArgument(filter.isDeterministic(), "Non-deterministic range filters are not supported yet");
        if (!filter.testNull()) {
            return false;
        }
    }
    return true;
}
Also used : DateTimeZone(org.joda.time.DateTimeZone) RunLengthEncodedBlock(com.facebook.presto.common.block.RunLengthEncodedBlock) BooleanInputStream(com.facebook.presto.orc.stream.BooleanInputStream) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) Locale(java.util.Locale) Map(java.util.Map) BlockLease(com.facebook.presto.common.block.BlockLease) ImmutableMap(com.google.common.collect.ImmutableMap) InputStreamSource(com.facebook.presto.orc.stream.InputStreamSource) Collection(java.util.Collection) Set(java.util.Set) SelectiveStreamReaders.initializeOutputPositions(com.facebook.presto.orc.reader.SelectiveStreamReaders.initializeOutputPositions) OrcRecordReaderOptions(com.facebook.presto.orc.OrcRecordReaderOptions) Preconditions.checkState(com.google.common.base.Preconditions.checkState) List(java.util.List) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) ClassLayout(org.openjdk.jol.info.ClassLayout) SizeOf.sizeOf(io.airlift.slice.SizeOf.sizeOf) IS_NOT_NULL(com.facebook.presto.common.predicate.TupleDomainFilter.IS_NOT_NULL) InputStreamSources(com.facebook.presto.orc.stream.InputStreamSources) Optional(java.util.Optional) IS_NULL(com.facebook.presto.common.predicate.TupleDomainFilter.IS_NULL) MoreObjects.toStringHelper(com.google.common.base.MoreObjects.toStringHelper) StreamDescriptor(com.facebook.presto.orc.StreamDescriptor) Iterables(com.google.common.collect.Iterables) Field(com.facebook.presto.common.type.RowType.Field) RowBlock(com.facebook.presto.common.block.RowBlock) HashMap(java.util.HashMap) Function(java.util.function.Function) ArrayList(java.util.ArrayList) PRESENT(com.facebook.presto.orc.metadata.Stream.StreamKind.PRESENT) OrcLocalMemoryContext(com.facebook.presto.orc.OrcLocalMemoryContext) Subfield(com.facebook.presto.common.Subfield) ImmutableList(com.google.common.collect.ImmutableList) Verify.verify(com.google.common.base.Verify.verify) Objects.requireNonNull(java.util.Objects.requireNonNull) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) Type(com.facebook.presto.common.type.Type) Stripe(com.facebook.presto.orc.Stripe) Nullable(javax.annotation.Nullable) MissingInputStreamSource.missingStreamSource(com.facebook.presto.orc.stream.MissingInputStreamSource.missingStreamSource) OrcAggregatedMemoryContext(com.facebook.presto.orc.OrcAggregatedMemoryContext) IOException(java.io.IOException) Maps(com.google.common.collect.Maps) ClosingBlockLease(com.facebook.presto.common.block.ClosingBlockLease) TupleDomainFilter(com.facebook.presto.common.predicate.TupleDomainFilter) Arrays.ensureCapacity(com.facebook.presto.common.array.Arrays.ensureCapacity) Block(com.facebook.presto.common.block.Block) RowType(com.facebook.presto.common.type.RowType) StreamDescriptor(com.facebook.presto.orc.StreamDescriptor) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) HashMap(java.util.HashMap) Subfield(com.facebook.presto.common.Subfield) TupleDomainFilter(com.facebook.presto.common.predicate.TupleDomainFilter)

Example 3 with Subfield

use of com.facebook.presto.common.Subfield in project presto by prestodb.

the class OrcTester method testRow.

private static boolean testRow(List<Type> types, List<List<?>> values, int row, Map<Integer, Map<Subfield, TupleDomainFilter>> columnFilters) {
    for (int column = 0; column < types.size(); column++) {
        Map<Subfield, TupleDomainFilter> filters = columnFilters.get(column);
        if (filters == null) {
            continue;
        }
        Type type = types.get(column);
        Object value = values.get(column).get(row);
        for (Map.Entry<Subfield, TupleDomainFilter> entry : filters.entrySet()) {
            if (!testSubfieldValue(type, value, entry.getKey(), entry.getValue())) {
                return false;
            }
        }
    }
    return true;
}
Also used : DecimalType(com.facebook.presto.common.type.DecimalType) ArrayType(com.facebook.presto.common.type.ArrayType) CharType(com.facebook.presto.common.type.CharType) RowType(com.facebook.presto.common.type.RowType) VarcharType(com.facebook.presto.common.type.VarcharType) VarbinaryType(com.facebook.presto.common.type.VarbinaryType) MapType(com.facebook.presto.common.type.MapType) Type(com.facebook.presto.common.type.Type) OrcLazyObject(com.facebook.hive.orc.lazy.OrcLazyObject) Map(java.util.Map) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap) Subfield(com.facebook.presto.common.Subfield) TupleDomainFilter(com.facebook.presto.common.predicate.TupleDomainFilter)

Example 4 with Subfield

use of com.facebook.presto.common.Subfield in project presto by prestodb.

the class OrcTester method assertFileContentsPresto.

private static void assertFileContentsPresto(List<Type> types, TempFile tempFile, List<List<?>> expectedValues, boolean skipFirstBatch, boolean skipStripe, OrcEncoding orcEncoding, Format format, boolean isHiveWriter, boolean useSelectiveOrcReader, List<OrcReaderSettings> settings, Map<Integer, Slice> intermediateEncryptionKeys) throws IOException {
    OrcPredicate orcPredicate = createOrcPredicate(types, expectedValues, format, isHiveWriter);
    Map<Integer, Type> includedColumns = IntStream.range(0, types.size()).boxed().collect(toImmutableMap(Function.identity(), types::get));
    List<Integer> outputColumns = IntStream.range(0, types.size()).boxed().collect(toImmutableList());
    if (useSelectiveOrcReader) {
        assertFileContentsPresto(types, tempFile.getFile(), expectedValues, orcEncoding, orcPredicate, Optional.empty(), ImmutableList.of(), ImmutableMap.of(), ImmutableMap.of(), intermediateEncryptionKeys, includedColumns, outputColumns);
        for (OrcReaderSettings entry : settings) {
            assertTrue(entry.getFilterFunctions().isEmpty(), "Filter functions are not supported yet");
            assertTrue(entry.getFilterFunctionInputMapping().isEmpty(), "Filter functions are not supported yet");
            Map<Integer, Map<Subfield, TupleDomainFilter>> columnFilters = entry.getColumnFilters();
            List<List<?>> prunedAndFilteredRows = pruneValues(types, filterRows(types, expectedValues, columnFilters), entry.getRequiredSubfields());
            Optional<TupleDomainFilterOrderChecker> orderChecker = Optional.empty();
            List<Integer> expectedFilterOrder = entry.getExpectedFilterOrder();
            if (!expectedFilterOrder.isEmpty()) {
                orderChecker = Optional.of(new TupleDomainFilterOrderChecker(expectedFilterOrder));
            }
            Optional<Map<Integer, Map<Subfield, TupleDomainFilter>>> transformedFilters = Optional.of(orderChecker.map(checker -> addOrderTracking(columnFilters, checker)).orElse(columnFilters));
            assertFileContentsPresto(types, tempFile.getFile(), prunedAndFilteredRows, orcEncoding, orcPredicate, transformedFilters, entry.getFilterFunctions(), entry.getFilterFunctionInputMapping(), entry.getRequiredSubfields());
            orderChecker.ifPresent(TupleDomainFilterOrderChecker::assertOrder);
        }
        return;
    }
    try (OrcBatchRecordReader recordReader = createCustomOrcRecordReader(tempFile, orcEncoding, orcPredicate, types, MAX_BATCH_SIZE, new StorageOrcFileTailSource(), new StorageStripeMetadataSource(), false, intermediateEncryptionKeys, false)) {
        assertEquals(recordReader.getReaderPosition(), 0);
        assertEquals(recordReader.getFilePosition(), 0);
        boolean isFirst = true;
        int rowsProcessed = 0;
        for (int batchSize = toIntExact(recordReader.nextBatch()); batchSize >= 0; batchSize = toIntExact(recordReader.nextBatch())) {
            if (skipStripe && rowsProcessed < 10000) {
            // skip recordReader.readBlock
            } else if (skipFirstBatch && isFirst) {
                // skip recordReader.readBlock
                isFirst = false;
            } else {
                for (int i = 0; i < types.size(); i++) {
                    Type type = types.get(i);
                    Block block = recordReader.readBlock(i);
                    assertEquals(block.getPositionCount(), batchSize);
                    checkNullValues(type, block);
                    assertBlockEquals(type, block, expectedValues.get(i), rowsProcessed);
                }
            }
            assertEquals(recordReader.getReaderPosition(), rowsProcessed);
            assertEquals(recordReader.getFilePosition(), rowsProcessed);
            rowsProcessed += batchSize;
        }
        assertEquals(rowsProcessed, expectedValues.get(0).size());
        assertEquals(recordReader.getReaderPosition(), rowsProcessed);
        assertEquals(recordReader.getFilePosition(), rowsProcessed);
    }
}
Also used : StorageOrcFileTailSource(com.facebook.presto.orc.cache.StorageOrcFileTailSource) TupleDomainFilter(com.facebook.presto.common.predicate.TupleDomainFilter) BigInteger(java.math.BigInteger) DecimalType(com.facebook.presto.common.type.DecimalType) ArrayType(com.facebook.presto.common.type.ArrayType) CharType(com.facebook.presto.common.type.CharType) RowType(com.facebook.presto.common.type.RowType) VarcharType(com.facebook.presto.common.type.VarcharType) VarbinaryType(com.facebook.presto.common.type.VarbinaryType) MapType(com.facebook.presto.common.type.MapType) Type(com.facebook.presto.common.type.Type) Block(com.facebook.presto.common.block.Block) Arrays.asList(java.util.Arrays.asList) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Lists.newArrayList(com.google.common.collect.Lists.newArrayList) ArrayList(java.util.ArrayList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) Collectors.toList(java.util.stream.Collectors.toList) Map(java.util.Map) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap) TestingOrcPredicate.createOrcPredicate(com.facebook.presto.orc.TestingOrcPredicate.createOrcPredicate) Subfield(com.facebook.presto.common.Subfield)

Example 5 with Subfield

use of com.facebook.presto.common.Subfield in project presto by prestodb.

the class TestSelectiveOrcReader method testArraysWithSubfieldPruning.

@Test
public void testArraysWithSubfieldPruning() throws Exception {
    tester.assertRoundTripWithSettings(arrayType(INTEGER), createList(NUM_ROWS, i -> ImmutableList.of(1, 2, 3, 4)), ImmutableList.of(OrcReaderSettings.builder().addRequiredSubfields(0, "c[1]").build(), OrcReaderSettings.builder().addRequiredSubfields(0, "c[1]", "c[2]").build(), OrcReaderSettings.builder().addRequiredSubfields(0, "c[2]").build()));
    Random random = new Random(0);
    tester.assertRoundTripWithSettings(arrayType(INTEGER), createList(NUM_ROWS, i -> ImmutableList.of(random.nextInt(10), random.nextInt(10), 3, 4)), ImmutableList.of(OrcReaderSettings.builder().addRequiredSubfields(0, "c[1]", "c[3]").setColumnFilters(ImmutableMap.of(0, ImmutableMap.of(new Subfield("c[1]"), BigintRange.of(0, 4, false)))).build(), OrcReaderSettings.builder().addRequiredSubfields(0, "c[2]", "c[3]").setColumnFilters(ImmutableMap.of(0, ImmutableMap.of(new Subfield("c[2]"), BigintRange.of(0, 4, false)))).build()));
    // arrays of arrays
    tester.assertRoundTripWithSettings(arrayType(arrayType(INTEGER)), createList(NUM_ROWS, i -> nCopies(1 + random.nextInt(5), ImmutableList.of(1, 2, 3))), ImmutableList.of(OrcReaderSettings.builder().addRequiredSubfields(0, "c[1][1]").build(), OrcReaderSettings.builder().addRequiredSubfields(0, "c[2][2]", "c[4][2]", "c[5][3]").build(), OrcReaderSettings.builder().addRequiredSubfields(0, "c[2][3]", "c[10][2]", "c[3][10]").build()));
    // arrays of maps
    tester.assertRoundTripWithSettings(arrayType(mapType(INTEGER, INTEGER)), createList(NUM_ROWS, i -> nCopies(5, ImmutableMap.of(1, 10, 2, 20))), ImmutableList.of(OrcReaderSettings.builder().addRequiredSubfields(0, "c[1][1]").build(), OrcReaderSettings.builder().addRequiredSubfields(0, "c[2][1]").build(), OrcReaderSettings.builder().addRequiredSubfields(0, "c[2][1]", "c[4][1]", "c[3][2]").build()));
}
Also used : CharType.createCharType(com.facebook.presto.common.type.CharType.createCharType) Page(com.facebook.presto.common.Page) DateTimeZone(org.joda.time.DateTimeZone) Arrays(java.util.Arrays) OrcTester.createCustomOrcSelectiveRecordReader(com.facebook.presto.orc.OrcTester.createCustomOrcSelectiveRecordReader) BigintRange(com.facebook.presto.common.predicate.TupleDomainFilter.BigintRange) Test(org.testng.annotations.Test) Random(java.util.Random) OrcTester.quickSelectiveOrcTester(com.facebook.presto.orc.OrcTester.quickSelectiveOrcTester) SESSION(com.facebook.presto.testing.TestingConnectorSession.SESSION) Iterables.concat(com.google.common.collect.Iterables.concat) Iterables.cycle(com.google.common.collect.Iterables.cycle) Slices(io.airlift.slice.Slices) Map(java.util.Map) HIVE_STORAGE_TIME_ZONE(com.facebook.presto.orc.OrcTester.HIVE_STORAGE_TIME_ZONE) FloatRange(com.facebook.presto.common.predicate.TupleDomainFilter.FloatRange) BigInteger(java.math.BigInteger) SqlDecimal(com.facebook.presto.common.type.SqlDecimal) BigintValuesUsingHashTable(com.facebook.presto.common.predicate.TupleDomainFilter.BigintValuesUsingHashTable) ImmutableMap(com.google.common.collect.ImmutableMap) DOUBLE(com.facebook.presto.common.type.DoubleType.DOUBLE) OrcTester.mapType(com.facebook.presto.orc.OrcTester.mapType) NONE(com.facebook.presto.orc.metadata.CompressionKind.NONE) Collections.nCopies(java.util.Collections.nCopies) BeforeClass(org.testng.annotations.BeforeClass) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Range(com.google.common.collect.Range) BooleanValue(com.facebook.presto.common.predicate.TupleDomainFilter.BooleanValue) Iterables.limit(com.google.common.collect.Iterables.limit) Assert.assertNotNull(org.testng.Assert.assertNotNull) Streams(com.google.common.collect.Streams) Assertions.assertBetweenInclusive(com.facebook.airlift.testing.Assertions.assertBetweenInclusive) List(java.util.List) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) Lists.newArrayList(com.google.common.collect.Lists.newArrayList) SqlTimestamp(com.facebook.presto.common.type.SqlTimestamp) IS_NOT_NULL(com.facebook.presto.common.predicate.TupleDomainFilter.IS_NOT_NULL) INTEGER(com.facebook.presto.common.type.IntegerType.INTEGER) CompressionKind(com.facebook.presto.orc.metadata.CompressionKind) Optional(java.util.Optional) IS_NULL(com.facebook.presto.common.predicate.TupleDomainFilter.IS_NULL) IntStream(java.util.stream.IntStream) MAX_BLOCK_SIZE(com.facebook.presto.orc.OrcTester.MAX_BLOCK_SIZE) DecimalType(com.facebook.presto.common.type.DecimalType) ContiguousSet(com.google.common.collect.ContiguousSet) Slice(io.airlift.slice.Slice) Assert.assertNull(org.testng.Assert.assertNull) TINYINT(com.facebook.presto.common.type.TinyintType.TINYINT) VARCHAR(com.facebook.presto.common.type.VarcharType.VARCHAR) DateTimeTestingUtils.sqlTimestampOf(com.facebook.presto.testing.DateTimeTestingUtils.sqlTimestampOf) Assert.assertEquals(org.testng.Assert.assertEquals) TIMESTAMP(com.facebook.presto.common.type.TimestampType.TIMESTAMP) Function(java.util.function.Function) DATE(com.facebook.presto.common.type.DateType.DATE) REAL(com.facebook.presto.common.type.RealType.REAL) BytesRange(com.facebook.presto.common.predicate.TupleDomainFilter.BytesRange) ArrayList(java.util.ArrayList) Strings(com.google.common.base.Strings) ZLIB(com.facebook.presto.orc.metadata.CompressionKind.ZLIB) SqlDate(com.facebook.presto.common.type.SqlDate) Subfield(com.facebook.presto.common.Subfield) ImmutableList(com.google.common.collect.ImmutableList) SqlVarbinary(com.facebook.presto.common.type.SqlVarbinary) DiscreteDomain(com.google.common.collect.DiscreteDomain) OrcTester.writeOrcColumnsPresto(com.facebook.presto.orc.OrcTester.writeOrcColumnsPresto) BOOLEAN(com.facebook.presto.common.type.BooleanType.BOOLEAN) CharType(com.facebook.presto.common.type.CharType) Type(com.facebook.presto.common.type.Type) MAX_BATCH_SIZE(com.facebook.presto.orc.OrcReader.MAX_BATCH_SIZE) BIGINT(com.facebook.presto.common.type.BigintType.BIGINT) OrcTester.arrayType(com.facebook.presto.orc.OrcTester.arrayType) InvalidFunctionArgumentException(com.facebook.presto.common.InvalidFunctionArgumentException) Iterator(java.util.Iterator) UTF_8(java.nio.charset.StandardCharsets.UTF_8) Assert.fail(org.testng.Assert.fail) AbstractIterator(com.google.common.collect.AbstractIterator) TupleDomainFilterUtils.toBigintValues(com.facebook.presto.common.predicate.TupleDomainFilterUtils.toBigintValues) VARBINARY(com.facebook.presto.common.type.VarbinaryType.VARBINARY) Maps(com.google.common.collect.Maps) Ints(com.google.common.primitives.Ints) TupleDomainFilter(com.facebook.presto.common.predicate.TupleDomainFilter) DWRF(com.facebook.presto.orc.OrcTester.Format.DWRF) OrcReaderSettings(com.facebook.presto.orc.OrcTester.OrcReaderSettings) Collectors.toList(java.util.stream.Collectors.toList) SMALLINT(com.facebook.presto.common.type.SmallintType.SMALLINT) OrcTester.rowType(com.facebook.presto.orc.OrcTester.rowType) TestingOrcPredicate.createOrcPredicate(com.facebook.presto.orc.TestingOrcPredicate.createOrcPredicate) Assert.assertTrue(org.testng.Assert.assertTrue) Block(com.facebook.presto.common.block.Block) BytesValues(com.facebook.presto.common.predicate.TupleDomainFilter.BytesValues) DoubleRange(com.facebook.presto.common.predicate.TupleDomainFilter.DoubleRange) Collections(java.util.Collections) ZSTD(com.facebook.presto.orc.metadata.CompressionKind.ZSTD) Random(java.util.Random) Subfield(com.facebook.presto.common.Subfield) Test(org.testng.annotations.Test)

Aggregations

Subfield (com.facebook.presto.common.Subfield)54 ImmutableMap (com.google.common.collect.ImmutableMap)27 Map (java.util.Map)27 ImmutableList (com.google.common.collect.ImmutableList)25 List (java.util.List)24 TupleDomainFilter (com.facebook.presto.common.predicate.TupleDomainFilter)22 Type (com.facebook.presto.common.type.Type)21 ArrayList (java.util.ArrayList)21 Optional (java.util.Optional)20 Test (org.testng.annotations.Test)19 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)18 ImmutableMap.toImmutableMap (com.google.common.collect.ImmutableMap.toImmutableMap)18 Collectors.toList (java.util.stream.Collectors.toList)12 TupleDomain (com.facebook.presto.common.predicate.TupleDomain)11 ColumnHandle (com.facebook.presto.spi.ColumnHandle)11 String.format (java.lang.String.format)11 Set (java.util.Set)11 Domain (com.facebook.presto.common.predicate.Domain)10 CharType (com.facebook.presto.common.type.CharType)10 DecimalType (com.facebook.presto.common.type.DecimalType)10