use of com.facebook.presto.orc.StreamDescriptor in project presto by prestodb.
the class StructSelectiveStreamReader method checkMissingFieldFilters.
private boolean checkMissingFieldFilters(Collection<StreamDescriptor> nestedStreams, Map<Subfield, TupleDomainFilter> filters) {
if (filters.isEmpty()) {
return true;
}
Set<String> presentFieldNames = nestedStreams.stream().map(StreamDescriptor::getFieldName).map(name -> name.toLowerCase(Locale.ENGLISH)).collect(toImmutableSet());
for (Map.Entry<Subfield, TupleDomainFilter> entry : filters.entrySet()) {
Subfield subfield = entry.getKey();
if (subfield.getPath().isEmpty()) {
continue;
}
String fieldName = ((Subfield.NestedField) subfield.getPath().get(0)).getName();
if (presentFieldNames.contains(fieldName)) {
continue;
}
// Check out the filter. If filter allows nulls, then all rows pass, otherwise, no row passes.
TupleDomainFilter filter = entry.getValue();
checkArgument(filter.isDeterministic(), "Non-deterministic range filters are not supported yet");
if (!filter.testNull()) {
return false;
}
}
return true;
}
use of com.facebook.presto.orc.StreamDescriptor in project presto by prestodb.
the class LongDictionaryProvider method getDictionary.
/**
* Loads a dictionary from a stream and attempts to reuse the dictionary buffer passed in.
*
* @param streamDescriptor descriptor indicating node and sequence of the stream reader
* the dictionary is associated with.
* @param dictionary dictionary buffer the method attempts to fill.
* @param items number of items expected in the dictionary.
* @return The DictionaryResult contains two parts:
* 1) the final dictionary buffer object. Different from the input dictionary buffer if the input
* dictionary buffer is expanded or that the method returns a shared dictionary.
* 2) whether the caller will be the owner of the dictionary, for the purpose of memory accounting.
* Callers own all non-shared dictionaries, and only the first caller of the shared dictionary
* is the owner.
* @throws IOException
*/
public DictionaryResult getDictionary(StreamDescriptor streamDescriptor, long[] dictionary, int items) throws IOException {
InputStreamSource<LongInputStream> dictionaryDataStream = dictionaryStreamSources.getInputStreamSource(streamDescriptor, DICTIONARY_DATA, LongInputStream.class);
// Fetch non-shared dictionaries.
if (dictionaryDataStream.openStream() != null) {
return loadDictionary(streamDescriptor, dictionaryDataStream, dictionary, items);
}
// Try fetching shared dictionaries
int streamId = streamDescriptor.getStreamId();
SharedDictionary sharedDictionary = sharedDictionaries.get(streamId);
boolean isNewEntry = sharedDictionary == null;
if (isNewEntry) {
StreamDescriptor sharedDictionaryStreamDescriptor = streamDescriptor.duplicate(DEFAULT_SEQUENCE_ID);
InputStreamSource<LongInputStream> sharedDictionaryDataStream = dictionaryStreamSources.getInputStreamSource(sharedDictionaryStreamDescriptor, DICTIONARY_DATA, LongInputStream.class);
long[] dictionaryBuffer = loadDictionary(streamDescriptor, sharedDictionaryDataStream, dictionary, items).dictionaryBuffer();
sharedDictionary = new SharedDictionary(dictionaryBuffer, items);
sharedDictionaries.put(streamId, sharedDictionary);
}
checkState(sharedDictionary.size == items, "Shared dictionary size mismatch for stream: %s", streamDescriptor);
return new DictionaryResult(sharedDictionary.values, isNewEntry);
}
use of com.facebook.presto.orc.StreamDescriptor in project presto by prestodb.
the class MapFlatBatchStreamReader method startStripe.
@Override
public void startStripe(Stripe stripe) throws IOException {
presentStreamSource = missingStreamSource(BooleanInputStream.class);
inMapStreamSources.clear();
valueStreamDescriptors.clear();
valueStreamReaders.clear();
ColumnEncoding encoding = stripe.getColumnEncodings().get(baseValueStreamDescriptor.getStreamId());
SortedMap<Integer, DwrfSequenceEncoding> additionalSequenceEncodings = Collections.emptySortedMap();
// encoding or encoding.getAdditionalSequenceEncodings() may not be present when every map is empty or null
if (encoding != null && encoding.getAdditionalSequenceEncodings().isPresent()) {
additionalSequenceEncodings = encoding.getAdditionalSequenceEncodings().get();
}
// The ColumnEncoding with sequence ID 0 doesn't have any data associated with it
for (int sequence : additionalSequenceEncodings.keySet()) {
inMapStreamSources.add(missingStreamSource(BooleanInputStream.class));
StreamDescriptor valueStreamDescriptor = copyStreamDescriptorWithSequence(baseValueStreamDescriptor, sequence);
valueStreamDescriptors.add(valueStreamDescriptor);
BatchStreamReader valueStreamReader = BatchStreamReaders.createStreamReader(type.getValueType(), valueStreamDescriptor, hiveStorageTimeZone, options, systemMemoryContext);
valueStreamReader.startStripe(stripe);
valueStreamReaders.add(valueStreamReader);
}
keyBlockTemplate = getKeyBlockTemplate(additionalSequenceEncodings.values());
readOffset = 0;
nextBatchSize = 0;
presentStream = null;
rowGroupOpen = false;
}
use of com.facebook.presto.orc.StreamDescriptor in project presto by prestodb.
the class SelectiveStreamReaders method createStreamReader.
public static SelectiveStreamReader createStreamReader(StreamDescriptor streamDescriptor, Map<Subfield, TupleDomainFilter> filters, Optional<Type> outputType, List<Subfield> requiredSubfields, DateTimeZone hiveStorageTimeZone, OrcRecordReaderOptions options, boolean legacyMapSubscript, OrcAggregatedMemoryContext systemMemoryContext) {
OrcTypeKind type = streamDescriptor.getOrcTypeKind();
switch(type) {
case BOOLEAN:
{
checkArgument(requiredSubfields.isEmpty(), "Boolean stream reader doesn't support subfields");
verifyStreamType(streamDescriptor, outputType, BooleanType.class::isInstance);
return new BooleanSelectiveStreamReader(streamDescriptor, getOptionalOnlyFilter(type, filters), outputType.isPresent(), systemMemoryContext.newOrcLocalMemoryContext(SelectiveStreamReaders.class.getSimpleName()));
}
case BYTE:
{
checkArgument(requiredSubfields.isEmpty(), "Byte stream reader doesn't support subfields");
verifyStreamType(streamDescriptor, outputType, TinyintType.class::isInstance);
return new ByteSelectiveStreamReader(streamDescriptor, getOptionalOnlyFilter(type, filters), outputType.isPresent(), systemMemoryContext.newOrcLocalMemoryContext(SelectiveStreamReaders.class.getSimpleName()));
}
case SHORT:
case INT:
case LONG:
case DATE:
{
checkArgument(requiredSubfields.isEmpty(), "Primitive type stream reader doesn't support subfields");
verifyStreamType(streamDescriptor, outputType, t -> t instanceof BigintType || t instanceof IntegerType || t instanceof SmallintType || t instanceof DateType);
return new LongSelectiveStreamReader(streamDescriptor, getOptionalOnlyFilter(type, filters), outputType, systemMemoryContext);
}
case FLOAT:
{
checkArgument(requiredSubfields.isEmpty(), "Float type stream reader doesn't support subfields");
verifyStreamType(streamDescriptor, outputType, RealType.class::isInstance);
return new FloatSelectiveStreamReader(streamDescriptor, getOptionalOnlyFilter(type, filters), outputType.isPresent(), systemMemoryContext.newOrcLocalMemoryContext(SelectiveStreamReaders.class.getSimpleName()));
}
case DOUBLE:
checkArgument(requiredSubfields.isEmpty(), "Double stream reader doesn't support subfields");
verifyStreamType(streamDescriptor, outputType, DoubleType.class::isInstance);
return new DoubleSelectiveStreamReader(streamDescriptor, getOptionalOnlyFilter(type, filters), outputType.isPresent(), systemMemoryContext.newOrcLocalMemoryContext(SelectiveStreamReaders.class.getSimpleName()));
case BINARY:
case STRING:
case VARCHAR:
case CHAR:
checkArgument(requiredSubfields.isEmpty(), "Primitive stream reader doesn't support subfields");
verifyStreamType(streamDescriptor, outputType, t -> t instanceof VarcharType || t instanceof CharType || t instanceof VarbinaryType);
return new SliceSelectiveStreamReader(streamDescriptor, getOptionalOnlyFilter(type, filters), outputType, systemMemoryContext);
case TIMESTAMP:
{
checkArgument(requiredSubfields.isEmpty(), "Timestamp stream reader doesn't support subfields");
verifyStreamType(streamDescriptor, outputType, TimestampType.class::isInstance);
return new TimestampSelectiveStreamReader(streamDescriptor, getOptionalOnlyFilter(type, filters), hiveStorageTimeZone, outputType.isPresent(), systemMemoryContext.newOrcLocalMemoryContext(SelectiveStreamReaders.class.getSimpleName()), options);
}
case LIST:
verifyStreamType(streamDescriptor, outputType, ArrayType.class::isInstance);
return new ListSelectiveStreamReader(streamDescriptor, filters, requiredSubfields, null, 0, outputType, hiveStorageTimeZone, options, legacyMapSubscript, systemMemoryContext);
case STRUCT:
verifyStreamType(streamDescriptor, outputType, RowType.class::isInstance);
return new StructSelectiveStreamReader(streamDescriptor, filters, requiredSubfields, outputType, hiveStorageTimeZone, options, legacyMapSubscript, systemMemoryContext);
case MAP:
verifyStreamType(streamDescriptor, outputType, MapType.class::isInstance);
return new MapSelectiveStreamReader(streamDescriptor, filters, requiredSubfields, outputType, hiveStorageTimeZone, options, legacyMapSubscript, systemMemoryContext);
case DECIMAL:
{
verifyStreamType(streamDescriptor, outputType, DecimalType.class::isInstance);
if (streamDescriptor.getOrcType().getPrecision().get() <= MAX_SHORT_PRECISION) {
return new ShortDecimalSelectiveStreamReader(streamDescriptor, getOptionalOnlyFilter(type, filters), outputType, systemMemoryContext.newOrcLocalMemoryContext(SelectiveStreamReaders.class.getSimpleName()));
} else {
return new LongDecimalSelectiveStreamReader(streamDescriptor, getOptionalOnlyFilter(type, filters), outputType, systemMemoryContext.newOrcLocalMemoryContext(SelectiveStreamReaders.class.getSimpleName()));
}
}
case UNION:
default:
throw new IllegalArgumentException("Unsupported type: " + type);
}
}
use of com.facebook.presto.orc.StreamDescriptor in project presto by prestodb.
the class MapFlatSelectiveStreamReader method startStripe.
@Override
public void startStripe(Stripe stripe) throws IOException {
presentStreamSource = missingStreamSource(BooleanInputStream.class);
inMapStreamSources.clear();
valueStreamDescriptors.clear();
valueStreamReaders.clear();
ColumnEncoding encoding = stripe.getColumnEncodings().get(baseValueStreamDescriptor.getStreamId());
SortedMap<Integer, DwrfSequenceEncoding> additionalSequenceEncodings = Collections.emptySortedMap();
// encoding or encoding.getAdditionalSequenceEncodings() may not be present when every map is empty or null
if (encoding != null && encoding.getAdditionalSequenceEncodings().isPresent()) {
additionalSequenceEncodings = encoding.getAdditionalSequenceEncodings().get();
}
keyIndices = ensureCapacity(keyIndices, additionalSequenceEncodings.size());
keyCount = 0;
// The ColumnEncoding with sequence ID 0 doesn't have any data associated with it
int keyIndex = 0;
for (Map.Entry<Integer, DwrfSequenceEncoding> entry : additionalSequenceEncodings.entrySet()) {
if (!isRequiredKey(entry.getValue())) {
keyIndex++;
continue;
}
keyIndices[keyCount] = keyIndex;
keyCount++;
keyIndex++;
int sequence = entry.getKey();
inMapStreamSources.add(missingStreamSource(BooleanInputStream.class));
StreamDescriptor valueStreamDescriptor = copyStreamDescriptorWithSequence(baseValueStreamDescriptor, sequence);
valueStreamDescriptors.add(valueStreamDescriptor);
SelectiveStreamReader valueStreamReader = SelectiveStreamReaders.createStreamReader(valueStreamDescriptor, ImmutableBiMap.of(), Optional.ofNullable(outputType).map(MapType::getValueType), ImmutableList.of(), hiveStorageTimeZone, options, legacyMapSubscript, systemMemoryContext.newOrcAggregatedMemoryContext());
valueStreamReader.startStripe(stripe);
valueStreamReaders.add(valueStreamReader);
}
keyBlock = getKeysBlock(ImmutableList.copyOf(additionalSequenceEncodings.values()));
readOffset = 0;
presentStream = null;
rowGroupOpen = false;
}
Aggregations