Search in sources :

Example 1 with StreamDescriptor

use of com.facebook.presto.orc.StreamDescriptor in project presto by prestodb.

the class StructSelectiveStreamReader method checkMissingFieldFilters.

private boolean checkMissingFieldFilters(Collection<StreamDescriptor> nestedStreams, Map<Subfield, TupleDomainFilter> filters) {
    if (filters.isEmpty()) {
        return true;
    }
    Set<String> presentFieldNames = nestedStreams.stream().map(StreamDescriptor::getFieldName).map(name -> name.toLowerCase(Locale.ENGLISH)).collect(toImmutableSet());
    for (Map.Entry<Subfield, TupleDomainFilter> entry : filters.entrySet()) {
        Subfield subfield = entry.getKey();
        if (subfield.getPath().isEmpty()) {
            continue;
        }
        String fieldName = ((Subfield.NestedField) subfield.getPath().get(0)).getName();
        if (presentFieldNames.contains(fieldName)) {
            continue;
        }
        // Check out the filter. If filter allows nulls, then all rows pass, otherwise, no row passes.
        TupleDomainFilter filter = entry.getValue();
        checkArgument(filter.isDeterministic(), "Non-deterministic range filters are not supported yet");
        if (!filter.testNull()) {
            return false;
        }
    }
    return true;
}
Also used : DateTimeZone(org.joda.time.DateTimeZone) RunLengthEncodedBlock(com.facebook.presto.common.block.RunLengthEncodedBlock) BooleanInputStream(com.facebook.presto.orc.stream.BooleanInputStream) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) Locale(java.util.Locale) Map(java.util.Map) BlockLease(com.facebook.presto.common.block.BlockLease) ImmutableMap(com.google.common.collect.ImmutableMap) InputStreamSource(com.facebook.presto.orc.stream.InputStreamSource) Collection(java.util.Collection) Set(java.util.Set) SelectiveStreamReaders.initializeOutputPositions(com.facebook.presto.orc.reader.SelectiveStreamReaders.initializeOutputPositions) OrcRecordReaderOptions(com.facebook.presto.orc.OrcRecordReaderOptions) Preconditions.checkState(com.google.common.base.Preconditions.checkState) List(java.util.List) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) ClassLayout(org.openjdk.jol.info.ClassLayout) SizeOf.sizeOf(io.airlift.slice.SizeOf.sizeOf) IS_NOT_NULL(com.facebook.presto.common.predicate.TupleDomainFilter.IS_NOT_NULL) InputStreamSources(com.facebook.presto.orc.stream.InputStreamSources) Optional(java.util.Optional) IS_NULL(com.facebook.presto.common.predicate.TupleDomainFilter.IS_NULL) MoreObjects.toStringHelper(com.google.common.base.MoreObjects.toStringHelper) StreamDescriptor(com.facebook.presto.orc.StreamDescriptor) Iterables(com.google.common.collect.Iterables) Field(com.facebook.presto.common.type.RowType.Field) RowBlock(com.facebook.presto.common.block.RowBlock) HashMap(java.util.HashMap) Function(java.util.function.Function) ArrayList(java.util.ArrayList) PRESENT(com.facebook.presto.orc.metadata.Stream.StreamKind.PRESENT) OrcLocalMemoryContext(com.facebook.presto.orc.OrcLocalMemoryContext) Subfield(com.facebook.presto.common.Subfield) ImmutableList(com.google.common.collect.ImmutableList) Verify.verify(com.google.common.base.Verify.verify) Objects.requireNonNull(java.util.Objects.requireNonNull) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) Type(com.facebook.presto.common.type.Type) Stripe(com.facebook.presto.orc.Stripe) Nullable(javax.annotation.Nullable) MissingInputStreamSource.missingStreamSource(com.facebook.presto.orc.stream.MissingInputStreamSource.missingStreamSource) OrcAggregatedMemoryContext(com.facebook.presto.orc.OrcAggregatedMemoryContext) IOException(java.io.IOException) Maps(com.google.common.collect.Maps) ClosingBlockLease(com.facebook.presto.common.block.ClosingBlockLease) TupleDomainFilter(com.facebook.presto.common.predicate.TupleDomainFilter) Arrays.ensureCapacity(com.facebook.presto.common.array.Arrays.ensureCapacity) Block(com.facebook.presto.common.block.Block) RowType(com.facebook.presto.common.type.RowType) StreamDescriptor(com.facebook.presto.orc.StreamDescriptor) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) HashMap(java.util.HashMap) Subfield(com.facebook.presto.common.Subfield) TupleDomainFilter(com.facebook.presto.common.predicate.TupleDomainFilter)

Example 2 with StreamDescriptor

use of com.facebook.presto.orc.StreamDescriptor in project presto by prestodb.

the class LongDictionaryProvider method getDictionary.

/**
 * Loads a dictionary from a stream and attempts to reuse the dictionary buffer passed in.
 *
 * @param streamDescriptor descriptor indicating node and sequence of the stream reader
 * the dictionary is associated with.
 * @param dictionary dictionary buffer the method attempts to fill.
 * @param items number of items expected in the dictionary.
 * @return The DictionaryResult contains two parts:
 * 1) the final dictionary buffer object. Different from the input dictionary buffer if the input
 *    dictionary buffer is expanded or that the method returns a shared dictionary.
 * 2) whether the caller will be the owner of the dictionary, for the purpose of memory accounting.
 *    Callers own all non-shared dictionaries, and only the first caller of the shared dictionary
 *    is the owner.
 * @throws IOException
 */
public DictionaryResult getDictionary(StreamDescriptor streamDescriptor, long[] dictionary, int items) throws IOException {
    InputStreamSource<LongInputStream> dictionaryDataStream = dictionaryStreamSources.getInputStreamSource(streamDescriptor, DICTIONARY_DATA, LongInputStream.class);
    // Fetch non-shared dictionaries.
    if (dictionaryDataStream.openStream() != null) {
        return loadDictionary(streamDescriptor, dictionaryDataStream, dictionary, items);
    }
    // Try fetching shared dictionaries
    int streamId = streamDescriptor.getStreamId();
    SharedDictionary sharedDictionary = sharedDictionaries.get(streamId);
    boolean isNewEntry = sharedDictionary == null;
    if (isNewEntry) {
        StreamDescriptor sharedDictionaryStreamDescriptor = streamDescriptor.duplicate(DEFAULT_SEQUENCE_ID);
        InputStreamSource<LongInputStream> sharedDictionaryDataStream = dictionaryStreamSources.getInputStreamSource(sharedDictionaryStreamDescriptor, DICTIONARY_DATA, LongInputStream.class);
        long[] dictionaryBuffer = loadDictionary(streamDescriptor, sharedDictionaryDataStream, dictionary, items).dictionaryBuffer();
        sharedDictionary = new SharedDictionary(dictionaryBuffer, items);
        sharedDictionaries.put(streamId, sharedDictionary);
    }
    checkState(sharedDictionary.size == items, "Shared dictionary size mismatch for stream: %s", streamDescriptor);
    return new DictionaryResult(sharedDictionary.values, isNewEntry);
}
Also used : StreamDescriptor(com.facebook.presto.orc.StreamDescriptor) LongInputStream(com.facebook.presto.orc.stream.LongInputStream)

Example 3 with StreamDescriptor

use of com.facebook.presto.orc.StreamDescriptor in project presto by prestodb.

the class MapFlatBatchStreamReader method startStripe.

@Override
public void startStripe(Stripe stripe) throws IOException {
    presentStreamSource = missingStreamSource(BooleanInputStream.class);
    inMapStreamSources.clear();
    valueStreamDescriptors.clear();
    valueStreamReaders.clear();
    ColumnEncoding encoding = stripe.getColumnEncodings().get(baseValueStreamDescriptor.getStreamId());
    SortedMap<Integer, DwrfSequenceEncoding> additionalSequenceEncodings = Collections.emptySortedMap();
    // encoding or encoding.getAdditionalSequenceEncodings() may not be present when every map is empty or null
    if (encoding != null && encoding.getAdditionalSequenceEncodings().isPresent()) {
        additionalSequenceEncodings = encoding.getAdditionalSequenceEncodings().get();
    }
    // The ColumnEncoding with sequence ID 0 doesn't have any data associated with it
    for (int sequence : additionalSequenceEncodings.keySet()) {
        inMapStreamSources.add(missingStreamSource(BooleanInputStream.class));
        StreamDescriptor valueStreamDescriptor = copyStreamDescriptorWithSequence(baseValueStreamDescriptor, sequence);
        valueStreamDescriptors.add(valueStreamDescriptor);
        BatchStreamReader valueStreamReader = BatchStreamReaders.createStreamReader(type.getValueType(), valueStreamDescriptor, hiveStorageTimeZone, options, systemMemoryContext);
        valueStreamReader.startStripe(stripe);
        valueStreamReaders.add(valueStreamReader);
    }
    keyBlockTemplate = getKeyBlockTemplate(additionalSequenceEncodings.values());
    readOffset = 0;
    nextBatchSize = 0;
    presentStream = null;
    rowGroupOpen = false;
}
Also used : ColumnEncoding(com.facebook.presto.orc.metadata.ColumnEncoding) BooleanInputStream(com.facebook.presto.orc.stream.BooleanInputStream) StreamDescriptor(com.facebook.presto.orc.StreamDescriptor) DwrfSequenceEncoding(com.facebook.presto.orc.metadata.DwrfSequenceEncoding)

Example 4 with StreamDescriptor

use of com.facebook.presto.orc.StreamDescriptor in project presto by prestodb.

the class SelectiveStreamReaders method createStreamReader.

public static SelectiveStreamReader createStreamReader(StreamDescriptor streamDescriptor, Map<Subfield, TupleDomainFilter> filters, Optional<Type> outputType, List<Subfield> requiredSubfields, DateTimeZone hiveStorageTimeZone, OrcRecordReaderOptions options, boolean legacyMapSubscript, OrcAggregatedMemoryContext systemMemoryContext) {
    OrcTypeKind type = streamDescriptor.getOrcTypeKind();
    switch(type) {
        case BOOLEAN:
            {
                checkArgument(requiredSubfields.isEmpty(), "Boolean stream reader doesn't support subfields");
                verifyStreamType(streamDescriptor, outputType, BooleanType.class::isInstance);
                return new BooleanSelectiveStreamReader(streamDescriptor, getOptionalOnlyFilter(type, filters), outputType.isPresent(), systemMemoryContext.newOrcLocalMemoryContext(SelectiveStreamReaders.class.getSimpleName()));
            }
        case BYTE:
            {
                checkArgument(requiredSubfields.isEmpty(), "Byte stream reader doesn't support subfields");
                verifyStreamType(streamDescriptor, outputType, TinyintType.class::isInstance);
                return new ByteSelectiveStreamReader(streamDescriptor, getOptionalOnlyFilter(type, filters), outputType.isPresent(), systemMemoryContext.newOrcLocalMemoryContext(SelectiveStreamReaders.class.getSimpleName()));
            }
        case SHORT:
        case INT:
        case LONG:
        case DATE:
            {
                checkArgument(requiredSubfields.isEmpty(), "Primitive type stream reader doesn't support subfields");
                verifyStreamType(streamDescriptor, outputType, t -> t instanceof BigintType || t instanceof IntegerType || t instanceof SmallintType || t instanceof DateType);
                return new LongSelectiveStreamReader(streamDescriptor, getOptionalOnlyFilter(type, filters), outputType, systemMemoryContext);
            }
        case FLOAT:
            {
                checkArgument(requiredSubfields.isEmpty(), "Float type stream reader doesn't support subfields");
                verifyStreamType(streamDescriptor, outputType, RealType.class::isInstance);
                return new FloatSelectiveStreamReader(streamDescriptor, getOptionalOnlyFilter(type, filters), outputType.isPresent(), systemMemoryContext.newOrcLocalMemoryContext(SelectiveStreamReaders.class.getSimpleName()));
            }
        case DOUBLE:
            checkArgument(requiredSubfields.isEmpty(), "Double stream reader doesn't support subfields");
            verifyStreamType(streamDescriptor, outputType, DoubleType.class::isInstance);
            return new DoubleSelectiveStreamReader(streamDescriptor, getOptionalOnlyFilter(type, filters), outputType.isPresent(), systemMemoryContext.newOrcLocalMemoryContext(SelectiveStreamReaders.class.getSimpleName()));
        case BINARY:
        case STRING:
        case VARCHAR:
        case CHAR:
            checkArgument(requiredSubfields.isEmpty(), "Primitive stream reader doesn't support subfields");
            verifyStreamType(streamDescriptor, outputType, t -> t instanceof VarcharType || t instanceof CharType || t instanceof VarbinaryType);
            return new SliceSelectiveStreamReader(streamDescriptor, getOptionalOnlyFilter(type, filters), outputType, systemMemoryContext);
        case TIMESTAMP:
            {
                checkArgument(requiredSubfields.isEmpty(), "Timestamp stream reader doesn't support subfields");
                verifyStreamType(streamDescriptor, outputType, TimestampType.class::isInstance);
                return new TimestampSelectiveStreamReader(streamDescriptor, getOptionalOnlyFilter(type, filters), hiveStorageTimeZone, outputType.isPresent(), systemMemoryContext.newOrcLocalMemoryContext(SelectiveStreamReaders.class.getSimpleName()), options);
            }
        case LIST:
            verifyStreamType(streamDescriptor, outputType, ArrayType.class::isInstance);
            return new ListSelectiveStreamReader(streamDescriptor, filters, requiredSubfields, null, 0, outputType, hiveStorageTimeZone, options, legacyMapSubscript, systemMemoryContext);
        case STRUCT:
            verifyStreamType(streamDescriptor, outputType, RowType.class::isInstance);
            return new StructSelectiveStreamReader(streamDescriptor, filters, requiredSubfields, outputType, hiveStorageTimeZone, options, legacyMapSubscript, systemMemoryContext);
        case MAP:
            verifyStreamType(streamDescriptor, outputType, MapType.class::isInstance);
            return new MapSelectiveStreamReader(streamDescriptor, filters, requiredSubfields, outputType, hiveStorageTimeZone, options, legacyMapSubscript, systemMemoryContext);
        case DECIMAL:
            {
                verifyStreamType(streamDescriptor, outputType, DecimalType.class::isInstance);
                if (streamDescriptor.getOrcType().getPrecision().get() <= MAX_SHORT_PRECISION) {
                    return new ShortDecimalSelectiveStreamReader(streamDescriptor, getOptionalOnlyFilter(type, filters), outputType, systemMemoryContext.newOrcLocalMemoryContext(SelectiveStreamReaders.class.getSimpleName()));
                } else {
                    return new LongDecimalSelectiveStreamReader(streamDescriptor, getOptionalOnlyFilter(type, filters), outputType, systemMemoryContext.newOrcLocalMemoryContext(SelectiveStreamReaders.class.getSimpleName()));
                }
            }
        case UNION:
        default:
            throw new IllegalArgumentException("Unsupported type: " + type);
    }
}
Also used : StreamDescriptor(com.facebook.presto.orc.StreamDescriptor) Iterables(com.google.common.collect.Iterables) DateTimeZone(org.joda.time.DateTimeZone) MapType(com.facebook.presto.common.type.MapType) DecimalType(com.facebook.presto.common.type.DecimalType) BooleanType(com.facebook.presto.common.type.BooleanType) OrcTypeKind(com.facebook.presto.orc.metadata.OrcType.OrcTypeKind) MAX_SHORT_PRECISION(com.facebook.presto.common.type.Decimals.MAX_SHORT_PRECISION) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) IntegerType(com.facebook.presto.common.type.IntegerType) Subfield(com.facebook.presto.common.Subfield) Map(java.util.Map) ArrayType(com.facebook.presto.common.type.ArrayType) CharType(com.facebook.presto.common.type.CharType) Type(com.facebook.presto.common.type.Type) TinyintType(com.facebook.presto.common.type.TinyintType) OrcAggregatedMemoryContext(com.facebook.presto.orc.OrcAggregatedMemoryContext) ImmutableMap(com.google.common.collect.ImmutableMap) Predicate(java.util.function.Predicate) BigintType(com.facebook.presto.common.type.BigintType) OrcRecordReaderOptions(com.facebook.presto.orc.OrcRecordReaderOptions) VarcharType(com.facebook.presto.common.type.VarcharType) RealType(com.facebook.presto.common.type.RealType) String.format(java.lang.String.format) TupleDomainFilter(com.facebook.presto.common.predicate.TupleDomainFilter) SmallintType(com.facebook.presto.common.type.SmallintType) List(java.util.List) VarbinaryType(com.facebook.presto.common.type.VarbinaryType) Optional(java.util.Optional) Arrays.ensureCapacity(com.facebook.presto.common.array.Arrays.ensureCapacity) DateType(com.facebook.presto.common.type.DateType) DoubleType(com.facebook.presto.common.type.DoubleType) RowType(com.facebook.presto.common.type.RowType) TimestampType(com.facebook.presto.common.type.TimestampType) VarcharType(com.facebook.presto.common.type.VarcharType) RowType(com.facebook.presto.common.type.RowType) OrcTypeKind(com.facebook.presto.orc.metadata.OrcType.OrcTypeKind) MapType(com.facebook.presto.common.type.MapType) ArrayType(com.facebook.presto.common.type.ArrayType) VarbinaryType(com.facebook.presto.common.type.VarbinaryType) SmallintType(com.facebook.presto.common.type.SmallintType) DateType(com.facebook.presto.common.type.DateType) BigintType(com.facebook.presto.common.type.BigintType) IntegerType(com.facebook.presto.common.type.IntegerType) DoubleType(com.facebook.presto.common.type.DoubleType) CharType(com.facebook.presto.common.type.CharType)

Example 5 with StreamDescriptor

use of com.facebook.presto.orc.StreamDescriptor in project presto by prestodb.

the class MapFlatSelectiveStreamReader method startStripe.

@Override
public void startStripe(Stripe stripe) throws IOException {
    presentStreamSource = missingStreamSource(BooleanInputStream.class);
    inMapStreamSources.clear();
    valueStreamDescriptors.clear();
    valueStreamReaders.clear();
    ColumnEncoding encoding = stripe.getColumnEncodings().get(baseValueStreamDescriptor.getStreamId());
    SortedMap<Integer, DwrfSequenceEncoding> additionalSequenceEncodings = Collections.emptySortedMap();
    // encoding or encoding.getAdditionalSequenceEncodings() may not be present when every map is empty or null
    if (encoding != null && encoding.getAdditionalSequenceEncodings().isPresent()) {
        additionalSequenceEncodings = encoding.getAdditionalSequenceEncodings().get();
    }
    keyIndices = ensureCapacity(keyIndices, additionalSequenceEncodings.size());
    keyCount = 0;
    // The ColumnEncoding with sequence ID 0 doesn't have any data associated with it
    int keyIndex = 0;
    for (Map.Entry<Integer, DwrfSequenceEncoding> entry : additionalSequenceEncodings.entrySet()) {
        if (!isRequiredKey(entry.getValue())) {
            keyIndex++;
            continue;
        }
        keyIndices[keyCount] = keyIndex;
        keyCount++;
        keyIndex++;
        int sequence = entry.getKey();
        inMapStreamSources.add(missingStreamSource(BooleanInputStream.class));
        StreamDescriptor valueStreamDescriptor = copyStreamDescriptorWithSequence(baseValueStreamDescriptor, sequence);
        valueStreamDescriptors.add(valueStreamDescriptor);
        SelectiveStreamReader valueStreamReader = SelectiveStreamReaders.createStreamReader(valueStreamDescriptor, ImmutableBiMap.of(), Optional.ofNullable(outputType).map(MapType::getValueType), ImmutableList.of(), hiveStorageTimeZone, options, legacyMapSubscript, systemMemoryContext.newOrcAggregatedMemoryContext());
        valueStreamReader.startStripe(stripe);
        valueStreamReaders.add(valueStreamReader);
    }
    keyBlock = getKeysBlock(ImmutableList.copyOf(additionalSequenceEncodings.values()));
    readOffset = 0;
    presentStream = null;
    rowGroupOpen = false;
}
Also used : ColumnEncoding(com.facebook.presto.orc.metadata.ColumnEncoding) BooleanInputStream(com.facebook.presto.orc.stream.BooleanInputStream) Map(java.util.Map) SortedMap(java.util.SortedMap) ImmutableBiMap(com.google.common.collect.ImmutableBiMap) StreamDescriptor(com.facebook.presto.orc.StreamDescriptor) DwrfSequenceEncoding(com.facebook.presto.orc.metadata.DwrfSequenceEncoding)

Aggregations

StreamDescriptor (com.facebook.presto.orc.StreamDescriptor)5 BooleanInputStream (com.facebook.presto.orc.stream.BooleanInputStream)3 Subfield (com.facebook.presto.common.Subfield)2 Arrays.ensureCapacity (com.facebook.presto.common.array.Arrays.ensureCapacity)2 TupleDomainFilter (com.facebook.presto.common.predicate.TupleDomainFilter)2 RowType (com.facebook.presto.common.type.RowType)2 Type (com.facebook.presto.common.type.Type)2 OrcAggregatedMemoryContext (com.facebook.presto.orc.OrcAggregatedMemoryContext)2 OrcRecordReaderOptions (com.facebook.presto.orc.OrcRecordReaderOptions)2 ColumnEncoding (com.facebook.presto.orc.metadata.ColumnEncoding)2 DwrfSequenceEncoding (com.facebook.presto.orc.metadata.DwrfSequenceEncoding)2 Preconditions.checkArgument (com.google.common.base.Preconditions.checkArgument)2 ImmutableMap (com.google.common.collect.ImmutableMap)2 Iterables (com.google.common.collect.Iterables)2 Map (java.util.Map)2 Block (com.facebook.presto.common.block.Block)1 BlockLease (com.facebook.presto.common.block.BlockLease)1 ClosingBlockLease (com.facebook.presto.common.block.ClosingBlockLease)1 RowBlock (com.facebook.presto.common.block.RowBlock)1 RunLengthEncodedBlock (com.facebook.presto.common.block.RunLengthEncodedBlock)1