Search in sources :

Example 1 with MutateHelper

use of org.apache.hadoop.hive.common.io.DiskRangeList.MutateHelper in project hive by apache.

the class LowLevelCacheImpl method getFileData.

@Override
public DiskRangeList getFileData(Object fileKey, DiskRangeList ranges, long baseOffset, DiskRangeListFactory factory, LowLevelCacheCounters qfCounters, BooleanRef gotAllData) {
    if (ranges == null)
        return null;
    DiskRangeList prev = ranges.prev;
    FileCache<ConcurrentSkipListMap<Long, LlapDataBuffer>> subCache = cache.get(fileKey);
    if (subCache == null || !subCache.incRef()) {
        long totalMissed = ranges.getTotalLength();
        metrics.incrCacheRequestedBytes(totalMissed);
        if (qfCounters != null) {
            qfCounters.recordCacheMiss(totalMissed);
        }
        if (prev != null && gotAllData != null) {
            gotAllData.value = false;
        }
        return ranges;
    }
    try {
        if (prev == null) {
            prev = new MutateHelper(ranges);
        }
        if (gotAllData != null) {
            gotAllData.value = true;
        }
        DiskRangeList current = ranges;
        while (current != null) {
            metrics.incrCacheRequestedBytes(current.getLength());
            // We assume ranges in "ranges" are non-overlapping; thus, we will save next in advance.
            DiskRangeList next = current.next;
            getOverlappingRanges(baseOffset, current, subCache.getCache(), factory, gotAllData);
            current = next;
        }
    } finally {
        subCache.decRef();
    }
    boolean isInvalid = false;
    if (qfCounters != null) {
        DiskRangeList current = prev.next;
        long bytesHit = 0, bytesMissed = 0;
        while (current != null) {
            // This assumes no ranges passed to cache to fetch have data beforehand.
            if (current.hasData()) {
                bytesHit += current.getLength();
            } else {
                if (gotAllData.value) {
                    isInvalid = true;
                }
                bytesMissed += current.getLength();
            }
            current = current.next;
        }
        qfCounters.recordCacheHit(bytesHit);
        qfCounters.recordCacheMiss(bytesMissed);
    } else if (gotAllData != null && gotAllData.value) {
        DiskRangeList current = prev.next;
        while (current != null) {
            if (!current.hasData()) {
                isInvalid = true;
                break;
            }
            current = current.next;
        }
    }
    if (isInvalid) {
        StringBuilder invalidMsg = new StringBuilder("Internal error - gotAllData=true but the resulting ranges are ").append(RecordReaderUtils.stringifyDiskRanges(prev.next));
        subCache = cache.get(fileKey);
        if (subCache != null && subCache.incRef()) {
            try {
                invalidMsg.append("; cache ranges (not necessarily consistent) are ");
                for (Map.Entry<Long, LlapDataBuffer> e : subCache.getCache().entrySet()) {
                    long start = e.getKey(), end = start + e.getValue().declaredCachedLength;
                    invalidMsg.append("[").append(start).append(", ").append(end).append("), ");
                }
            } finally {
                subCache.decRef();
            }
        } else {
            invalidMsg.append("; cache ranges can no longer be determined");
        }
        String s = invalidMsg.toString();
        LlapIoImpl.LOG.error(s);
        throw new RuntimeException(s);
    }
    return prev.next;
}
Also used : ConcurrentSkipListMap(java.util.concurrent.ConcurrentSkipListMap) DiskRangeList(org.apache.hadoop.hive.common.io.DiskRangeList) MutateHelper(org.apache.hadoop.hive.common.io.DiskRangeList.MutateHelper) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) ConcurrentSkipListMap(java.util.concurrent.ConcurrentSkipListMap) Map(java.util.Map)

Example 2 with MutateHelper

use of org.apache.hadoop.hive.common.io.DiskRangeList.MutateHelper in project hive by apache.

the class EncodedReaderImpl method preReadDataRanges.

@Override
public void preReadDataRanges(DiskRangeList ranges) throws IOException {
    boolean hasFileId = this.fileKey != null;
    long baseOffset = 0L;
    // 2. Now, read all of the ranges from cache or disk.
    IdentityHashMap<ByteBuffer, Boolean> toRelease = new IdentityHashMap<>();
    MutateHelper toRead = getDataFromCacheAndDisk(ranges, 0, hasFileId, toRelease);
    // 3. For uncompressed case, we need some special processing before read.
    preReadUncompressedStreams(baseOffset, toRead, toRelease);
    // 4. Decompress the data.
    ColumnStreamData csd = POOLS.csdPool.take();
    try {
        csd.incRef();
        DiskRangeList drl = toRead.next;
        while (drl != null) {
            drl = readEncodedStream(baseOffset, drl, drl.getOffset(), drl.getEnd(), csd, drl.getOffset(), drl.getEnd(), toRelease);
            for (MemoryBuffer buf : csd.getCacheBuffers()) {
                cacheWrapper.releaseBuffer(buf);
            }
            if (drl != null)
                drl = drl.next;
        }
    } finally {
        if (toRead != null) {
            releaseInitialRefcounts(toRead.next);
        }
        if (toRelease != null) {
            releaseBuffers(toRelease.keySet(), true);
            toRelease.clear();
        }
        if (csd != null) {
            csd.decRef();
            POOLS.csdPool.offer(csd);
        }
    }
}
Also used : MutateHelper(org.apache.hadoop.hive.common.io.DiskRangeList.MutateHelper) MemoryBuffer(org.apache.hadoop.hive.common.io.encoded.MemoryBuffer) DiskRangeList(org.apache.hadoop.hive.common.io.DiskRangeList) IdentityHashMap(java.util.IdentityHashMap) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) ByteBuffer(java.nio.ByteBuffer) ColumnStreamData(org.apache.hadoop.hive.common.io.encoded.EncodedColumnBatch.ColumnStreamData)

Example 3 with MutateHelper

use of org.apache.hadoop.hive.common.io.DiskRangeList.MutateHelper in project hive by apache.

the class OrcFileEstimateErrors method getIncompleteCbs.

public DiskRangeList getIncompleteCbs(DiskRangeList ranges, long baseOffset, DiskRangeListFactory factory, BooleanRef gotAllData) {
    DiskRangeList prev = ranges.prev;
    if (prev == null) {
        prev = new MutateHelper(ranges);
    }
    DiskRangeList current = ranges;
    // Assume by default that we would find everything.
    gotAllData.value = true;
    while (current != null) {
        // We assume ranges in "ranges" are non-overlapping; thus, we will save next in advance.
        DiskRangeList check = current;
        current = current.next;
        if (check.hasData())
            continue;
        Integer badLength = cache.get(Long.valueOf(check.getOffset() + baseOffset));
        if (badLength == null || badLength < check.getLength()) {
            gotAllData.value = false;
            continue;
        }
        // We could just remove here and handle the missing tail during read, but that can be
        // dangerous; let's explicitly add an incomplete CB.
        check.replaceSelfWith(new IncompleteCb(check.getOffset(), check.getEnd()));
    }
    return prev.next;
}
Also used : MutateHelper(org.apache.hadoop.hive.common.io.DiskRangeList.MutateHelper) DiskRangeList(org.apache.hadoop.hive.common.io.DiskRangeList) IncompleteCb(org.apache.hadoop.hive.ql.io.orc.encoded.IncompleteCb)

Example 4 with MutateHelper

use of org.apache.hadoop.hive.common.io.DiskRangeList.MutateHelper in project hive by apache.

the class EncodedReaderImpl method readEncodedColumns.

@Override
public void readEncodedColumns(int stripeIx, StripeInformation stripe, OrcProto.RowIndex[] indexes, List<OrcProto.ColumnEncoding> encodings, List<OrcProto.Stream> streamList, boolean[] physicalFileIncludes, boolean[] rgs, Consumer<OrcEncodedColumnBatch> consumer) throws IOException {
    // Note: for now we don't have to setError here, caller will setError if we throw.
    // We are also not supposed to call setDone, since we are only part of the operation.
    long stripeOffset = stripe.getOffset();
    // 1. Figure out what we have to read.
    // Stream offset in relation to the stripe.
    long offset = 0;
    // 1.1. Figure out which columns have a present stream
    boolean[] hasNull = findPresentStreamsByColumn(streamList, types);
    if (isTracingEnabled) {
        LOG.trace("The following columns have PRESENT streams: " + arrayToString(hasNull));
    }
    // We assume stream list is sorted by column and that non-data
    // streams do not interleave data streams for the same column.
    // 1.2. With that in mind, determine disk ranges to read/get from cache (not by stream).
    ColumnReadContext[] colCtxs = new ColumnReadContext[physicalFileIncludes.length];
    int colRgIx = -1;
    // Don't create context for the 0-s column.
    for (int i = 1; i < physicalFileIncludes.length; ++i) {
        if (!physicalFileIncludes[i])
            continue;
        ColumnEncoding enc = encodings.get(i);
        colCtxs[i] = new ColumnReadContext(i, enc, indexes[i], ++colRgIx);
        if (isTracingEnabled) {
            LOG.trace("Creating context: " + colCtxs[i].toString());
        }
        trace.logColumnRead(i, colRgIx, enc.getKind());
    }
    CreateHelper listToRead = new CreateHelper();
    boolean hasIndexOnlyCols = false, hasAnyNonData = false;
    for (OrcProto.Stream stream : streamList) {
        long length = stream.getLength();
        int colIx = stream.getColumn();
        OrcProto.Stream.Kind streamKind = stream.getKind();
        boolean isIndexCol = StreamName.getArea(streamKind) != StreamName.Area.DATA;
        hasAnyNonData = hasAnyNonData || isIndexCol;
        // We have a stream for included column, but in future it might have no data streams.
        // It's more like "has at least one column included that has an index stream".
        hasIndexOnlyCols = hasIndexOnlyCols || (isIndexCol && physicalFileIncludes[colIx]);
        if (!physicalFileIncludes[colIx] || isIndexCol) {
            if (isTracingEnabled) {
                LOG.trace("Skipping stream for column " + colIx + ": " + streamKind + " at " + offset + ", " + length);
            }
            trace.logSkipStream(colIx, streamKind, offset, length);
            offset += length;
            continue;
        }
        ColumnReadContext ctx = colCtxs[colIx];
        assert ctx != null;
        int indexIx = RecordReaderUtils.getIndexPosition(ctx.encoding.getKind(), fileSchema.findSubtype(colIx).getCategory(), streamKind, isCompressed, hasNull[colIx]);
        ctx.addStream(offset, stream, indexIx);
        if (isTracingEnabled) {
            LOG.trace("Adding stream for column " + colIx + ": " + streamKind + " at " + offset + ", " + length + ", index position " + indexIx);
        }
        if (rgs == null || RecordReaderUtils.isDictionary(streamKind, encodings.get(colIx))) {
            trace.logAddStream(colIx, streamKind, offset, length, indexIx, true);
            addEntireStreamToRanges(offset, length, listToRead, true);
            if (isTracingEnabled) {
                LOG.trace("Will read whole stream " + streamKind + "; added to " + listToRead.getTail());
            }
        } else {
            trace.logAddStream(colIx, streamKind, offset, length, indexIx, false);
            addRgFilteredStreamToRanges(stream, rgs, isCompressed, indexes[colIx], encodings.get(colIx), fileSchema.findSubtype(colIx).getCategory(), bufferSize, hasNull[colIx], offset, length, listToRead, true);
        }
        offset += length;
    }
    boolean hasFileId = this.fileKey != null;
    if (listToRead.get() == null) {
        // No data to read for this stripe. Check if we have some included index-only columns.
        // For example, count(1) would have the root column, that has no data stream, included.
        // It may also happen that we have a column included with no streams whatsoever. That
        // should only be possible if the file has no index streams.
        boolean hasAnyIncludes = false;
        if (!hasIndexOnlyCols) {
            for (int i = 0; i < physicalFileIncludes.length; ++i) {
                if (!physicalFileIncludes[i])
                    continue;
                hasAnyIncludes = true;
                break;
            }
        }
        boolean nonProjectionRead = hasIndexOnlyCols || (!hasAnyNonData && hasAnyIncludes);
        // We should probably just disable filtering for such cases if they exist.
        if (nonProjectionRead && (rgs == SargApplier.READ_ALL_RGS)) {
            OrcEncodedColumnBatch ecb = POOLS.ecbPool.take();
            ecb.init(fileKey, stripeIx, OrcEncodedColumnBatch.ALL_RGS, physicalFileIncludes.length);
            try {
                consumer.consumeData(ecb);
            } catch (InterruptedException e) {
                LOG.error("IO thread interrupted while queueing data");
                throw new IOException(e);
            }
        } else {
            LOG.warn("Nothing to read for stripe [" + stripe + "]");
        }
        return;
    }
    // 2. Now, read all of the ranges from cache or disk.
    IdentityHashMap<ByteBuffer, Boolean> toRelease = new IdentityHashMap<>();
    MutateHelper toRead = getDataFromCacheAndDisk(listToRead.get(), stripeOffset, hasFileId, toRelease);
    // 3. For uncompressed case, we need some special processing before read.
    // Basically, we are trying to create artificial, consistent ranges to cache, as there are
    // no CBs in an uncompressed file. At the end of this processing, the list would contain
    // either cache buffers, or buffers allocated by us and not cached (if we are only reading
    // parts of the data for some ranges and don't want to cache it). Both are represented by
    // CacheChunks, so the list is just CacheChunk-s from that point on.
    DiskRangeList iter = preReadUncompressedStreams(stripeOffset, colCtxs, toRead, toRelease);
    // 4. Finally, decompress data, map per RG, and return to caller.
    // We go by RG and not by column because that is how data is processed.
    boolean hasError = true;
    try {
        int rgCount = rowIndexStride == 0 ? 1 : (int) Math.ceil((double) stripe.getNumberOfRows() / rowIndexStride);
        for (int rgIx = 0; rgIx < rgCount; ++rgIx) {
            if (rgs != null && !rgs[rgIx]) {
                // RG filtered.
                continue;
            }
            boolean isLastRg = rgIx == rgCount - 1;
            // Create the batch we will use to return data for this RG.
            OrcEncodedColumnBatch ecb = POOLS.ecbPool.take();
            trace.logStartRg(rgIx);
            boolean hasErrorForEcb = true;
            try {
                ecb.init(fileKey, stripeIx, rgIx, physicalFileIncludes.length);
                for (int colIx = 0; colIx < colCtxs.length; ++colIx) {
                    ColumnReadContext ctx = colCtxs[colIx];
                    // This column is not included
                    if (ctx == null)
                        continue;
                    OrcProto.RowIndexEntry index;
                    OrcProto.RowIndexEntry nextIndex;
                    // index is disabled
                    if (ctx.rowIndex == null) {
                        if (isTracingEnabled) {
                            LOG.trace("Row index is null. Likely reading a file with indexes disabled.");
                        }
                        index = null;
                        nextIndex = null;
                    } else {
                        index = ctx.rowIndex.getEntry(rgIx);
                        nextIndex = isLastRg ? null : ctx.rowIndex.getEntry(rgIx + 1);
                    }
                    if (isTracingEnabled) {
                        LOG.trace("ctx: {} rgIx: {} isLastRg: {} rgCount: {}", ctx, rgIx, isLastRg, rgCount);
                    }
                    ecb.initOrcColumn(ctx.colIx);
                    trace.logStartCol(ctx.colIx);
                    for (int streamIx = 0; streamIx < ctx.streamCount; ++streamIx) {
                        StreamContext sctx = ctx.streams[streamIx];
                        ColumnStreamData cb = null;
                        try {
                            if (RecordReaderUtils.isDictionary(sctx.kind, ctx.encoding) || index == null) {
                                // This stream is for entire stripe and needed for every RG; uncompress once and reuse.
                                if (sctx.stripeLevelStream == null) {
                                    if (isTracingEnabled) {
                                        LOG.trace("Getting stripe-level stream [" + sctx.kind + ", " + ctx.encoding + "] for" + " column " + ctx.colIx + " RG " + rgIx + " at " + sctx.offset + ", " + sctx.length);
                                    }
                                    trace.logStartStripeStream(sctx.kind);
                                    sctx.stripeLevelStream = POOLS.csdPool.take();
                                    // We will be using this for each RG while also sending RGs to processing.
                                    // To avoid buffers being unlocked, run refcount one ahead; so each RG
                                    // processing will decref once, and the last one will unlock the buffers.
                                    sctx.stripeLevelStream.incRef();
                                    // For stripe-level streams we don't need the extra refcount on the block.
                                    // See class comment about refcounts.
                                    long unlockUntilCOffset = sctx.offset + sctx.length;
                                    DiskRangeList lastCached = readEncodedStream(stripeOffset, iter, sctx.offset, sctx.offset + sctx.length, sctx.stripeLevelStream, unlockUntilCOffset, sctx.offset, toRelease);
                                    if (lastCached != null) {
                                        iter = lastCached;
                                    }
                                }
                                sctx.stripeLevelStream.incRef();
                                cb = sctx.stripeLevelStream;
                            } else {
                                // This stream can be separated by RG using index. Let's do that.
                                // Offset to where this RG begins.
                                long cOffset = sctx.offset + index.getPositions(sctx.streamIndexOffset);
                                // Offset relative to the beginning of the stream of where this RG ends.
                                long nextCOffsetRel = isLastRg ? sctx.length : nextIndex.getPositions(sctx.streamIndexOffset);
                                // Offset before which this RG is guaranteed to end. Can only be estimated.
                                // We estimate the same way for compressed and uncompressed for now.
                                long endCOffset = sctx.offset + estimateRgEndOffset(isCompressed, isLastRg, nextCOffsetRel, sctx.length, bufferSize);
                                // As we read, we can unlock initial refcounts for the buffers that end before
                                // the data that we need for this RG.
                                long unlockUntilCOffset = sctx.offset + nextCOffsetRel;
                                cb = createRgColumnStreamData(rgIx, isLastRg, ctx.colIx, sctx, cOffset, endCOffset, isCompressed, unlockUntilCOffset);
                                boolean isStartOfStream = sctx.bufferIter == null;
                                DiskRangeList lastCached = readEncodedStream(stripeOffset, (isStartOfStream ? iter : sctx.bufferIter), cOffset, endCOffset, cb, unlockUntilCOffset, sctx.offset, toRelease);
                                if (lastCached != null) {
                                    sctx.bufferIter = iter = lastCached;
                                }
                            }
                        } catch (Exception ex) {
                            DiskRangeList drl = toRead == null ? null : toRead.next;
                            LOG.error("Error getting stream [" + sctx.kind + ", " + ctx.encoding + "] for" + " column " + ctx.colIx + " RG " + rgIx + " at " + sctx.offset + ", " + sctx.length + "; toRead " + RecordReaderUtils.stringifyDiskRanges(drl), ex);
                            throw (ex instanceof IOException) ? (IOException) ex : new IOException(ex);
                        } finally {
                            // Otherwise, we won't release consumer refcounts for a partially read stream.
                            if (cb != null) {
                                ecb.setStreamData(ctx.colIx, sctx.kind.getNumber(), cb);
                            }
                        }
                    }
                }
                hasErrorForEcb = false;
            } finally {
                if (hasErrorForEcb) {
                    releaseEcbRefCountsOnError(ecb);
                }
            }
            try {
                consumer.consumeData(ecb);
            // After this, the non-initial refcounts are the responsibility of the consumer.
            } catch (InterruptedException e) {
                LOG.error("IO thread interrupted while queueing data");
                releaseEcbRefCountsOnError(ecb);
                throw new IOException(e);
            }
        }
        if (isTracingEnabled) {
            LOG.trace("Disk ranges after preparing all the data " + RecordReaderUtils.stringifyDiskRanges(toRead.next));
        }
        trace.logRanges(fileKey, stripeOffset, toRead.next, RangesSrc.PREREAD);
        hasError = false;
    } finally {
        try {
            // Release the unreleased stripe-level buffers. See class comment about refcounts.
            for (int colIx = 0; colIx < colCtxs.length; ++colIx) {
                ColumnReadContext ctx = colCtxs[colIx];
                // This column is not included.
                if (ctx == null)
                    continue;
                for (int streamIx = 0; streamIx < ctx.streamCount; ++streamIx) {
                    StreamContext sctx = ctx.streams[streamIx];
                    if (sctx == null || sctx.stripeLevelStream == null)
                        continue;
                    if (0 != sctx.stripeLevelStream.decRef())
                        continue;
                    // essentially the "consumer" refcount being released here.
                    for (MemoryBuffer buf : sctx.stripeLevelStream.getCacheBuffers()) {
                        LOG.trace("Unlocking {} at the end of processing", buf);
                        cacheWrapper.releaseBuffer(buf);
                    }
                }
            }
            releaseInitialRefcounts(toRead.next);
            // Release buffers as we are done with all the streams... also see toRelease comment.
            releaseBuffers(toRelease.keySet(), true);
        } catch (Throwable t) {
            if (!hasError)
                throw new IOException(t);
            LOG.error("Error during the cleanup after another error; ignoring", t);
        }
    }
}
Also used : DiskRangeList(org.apache.hadoop.hive.common.io.DiskRangeList) OrcProto(org.apache.orc.OrcProto) IdentityHashMap(java.util.IdentityHashMap) CreateHelper(org.apache.hadoop.hive.common.io.DiskRangeList.CreateHelper) Stream(org.apache.orc.OrcProto.Stream) MutateHelper(org.apache.hadoop.hive.common.io.DiskRangeList.MutateHelper) Kind(org.apache.orc.OrcProto.Stream.Kind) OutStream(org.apache.orc.impl.OutStream) Stream(org.apache.orc.OrcProto.Stream) InStream(org.apache.orc.impl.InStream) CodedInputStream(com.google.protobuf.CodedInputStream) InputStream(java.io.InputStream) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) ColumnStreamData(org.apache.hadoop.hive.common.io.encoded.EncodedColumnBatch.ColumnStreamData) IOException(java.io.IOException) ByteBuffer(java.nio.ByteBuffer) IOException(java.io.IOException) ColumnEncoding(org.apache.orc.OrcProto.ColumnEncoding) OrcEncodedColumnBatch(org.apache.hadoop.hive.ql.io.orc.encoded.Reader.OrcEncodedColumnBatch) MemoryBuffer(org.apache.hadoop.hive.common.io.encoded.MemoryBuffer)

Example 5 with MutateHelper

use of org.apache.hadoop.hive.common.io.DiskRangeList.MutateHelper in project hive by apache.

the class EncodedReaderImpl method readIndexStreams.

@Override
public void readIndexStreams(OrcIndex index, StripeInformation stripe, List<OrcProto.Stream> streams, boolean[] physicalFileIncludes, boolean[] sargColumns) throws IOException {
    long stripeOffset = stripe.getOffset();
    DiskRangeList indexRanges = planIndexReading(fileSchema, streams, true, physicalFileIncludes, sargColumns, version, index.getBloomFilterKinds());
    if (indexRanges == null) {
        LOG.debug("Nothing to read for stripe [{}]", stripe);
        return;
    }
    ReadContext[] colCtxs = new ReadContext[physicalFileIncludes.length];
    int colRgIx = -1;
    for (int i = 0; i < physicalFileIncludes.length; ++i) {
        if (!physicalFileIncludes[i] && (sargColumns == null || !sargColumns[i]))
            continue;
        colCtxs[i] = new ReadContext(i, ++colRgIx);
        if (isTracingEnabled) {
            LOG.trace("Creating context: " + colCtxs[i].toString());
        }
        // Bogus encoding.
        trace.logColumnRead(i, colRgIx, ColumnEncoding.Kind.DIRECT);
    }
    long offset = 0;
    for (OrcProto.Stream stream : streams) {
        long length = stream.getLength();
        int colIx = stream.getColumn();
        OrcProto.Stream.Kind streamKind = stream.getKind();
        // See planIndexReading - only read non-row-index streams if involved in SARGs.
        if ((StreamName.getArea(streamKind) == StreamName.Area.INDEX) && ((sargColumns != null && sargColumns[colIx]) || (physicalFileIncludes[colIx] && streamKind == Kind.ROW_INDEX))) {
            trace.logAddStream(colIx, streamKind, offset, length, -1, true);
            colCtxs[colIx].addStream(offset, stream, -1);
            if (isTracingEnabled) {
                LOG.trace("Adding stream for column " + colIx + ": " + streamKind + " at " + offset + ", " + length);
            }
        }
        offset += length;
    }
    boolean hasFileId = this.fileKey != null;
    // 2. Now, read all of the ranges from cache or disk.
    IdentityHashMap<ByteBuffer, Boolean> toRelease = new IdentityHashMap<>();
    MutateHelper toRead = getDataFromCacheAndDisk(indexRanges, stripeOffset, hasFileId, toRelease);
    // 3. For uncompressed case, we need some special processing before read.
    DiskRangeList iter = preReadUncompressedStreams(stripeOffset, colCtxs, toRead, toRelease);
    // 4. Decompress the data.
    boolean hasError = true;
    try {
        for (int colIx = 0; colIx < colCtxs.length; ++colIx) {
            ReadContext ctx = colCtxs[colIx];
            // This column is not included.
            if (ctx == null)
                continue;
            for (int streamIx = 0; streamIx < ctx.streamCount; ++streamIx) {
                StreamContext sctx = ctx.streams[streamIx];
                try {
                    if (isTracingEnabled) {
                        LOG.trace("Getting index stream " + sctx.kind + " for column " + ctx.colIx + " at " + sctx.offset + ", " + sctx.length);
                    }
                    ColumnStreamData csd = POOLS.csdPool.take();
                    long endCOffset = sctx.offset + sctx.length;
                    DiskRangeList lastCached = readEncodedStream(stripeOffset, iter, sctx.offset, endCOffset, csd, endCOffset, sctx.offset, toRelease);
                    if (lastCached != null) {
                        iter = lastCached;
                    }
                    if (isTracingEnabled) {
                        traceLogBuffersUsedToParse(csd);
                    }
                    CodedInputStream cis = CodedInputStream.newInstance(new IndexStream(csd.getCacheBuffers(), sctx.length));
                    cis.setSizeLimit(InStream.PROTOBUF_MESSAGE_MAX_LIMIT);
                    switch(sctx.kind) {
                        case ROW_INDEX:
                            OrcProto.RowIndex tmp = index.getRowGroupIndex()[colIx] = OrcProto.RowIndex.parseFrom(cis);
                            if (isTracingEnabled) {
                                LOG.trace("Index is " + tmp.toString().replace('\n', ' '));
                            }
                            break;
                        case BLOOM_FILTER:
                        case BLOOM_FILTER_UTF8:
                            index.getBloomFilterIndex()[colIx] = OrcProto.BloomFilterIndex.parseFrom(cis);
                            break;
                        default:
                            throw new AssertionError("Unexpected index stream type " + sctx.kind);
                    }
                    // We are done with the buffers; unlike data blocks, we are also the consumer. Release.
                    for (MemoryBuffer buf : csd.getCacheBuffers()) {
                        if (buf == null)
                            continue;
                        cacheWrapper.releaseBuffer(buf);
                    }
                } catch (Exception ex) {
                    DiskRangeList drl = toRead == null ? null : toRead.next;
                    LOG.error("Error getting stream " + sctx.kind + " for column " + ctx.colIx + " at " + sctx.offset + ", " + sctx.length + "; toRead " + RecordReaderUtils.stringifyDiskRanges(drl), ex);
                    throw (ex instanceof IOException) ? (IOException) ex : new IOException(ex);
                }
            }
        }
        if (isTracingEnabled) {
            LOG.trace("Disk ranges after preparing all the data " + RecordReaderUtils.stringifyDiskRanges(toRead.next));
        }
        hasError = false;
    } finally {
        // Release the unreleased buffers. See class comment about refcounts.
        try {
            if (toRead != null) {
                releaseInitialRefcounts(toRead.next);
            }
            releaseBuffers(toRelease.keySet(), true);
        } catch (Throwable t) {
            if (!hasError)
                throw new IOException(t);
            LOG.error("Error during the cleanup after another error; ignoring", t);
        }
    }
}
Also used : DiskRangeList(org.apache.hadoop.hive.common.io.DiskRangeList) CodedInputStream(com.google.protobuf.CodedInputStream) OrcProto(org.apache.orc.OrcProto) IdentityHashMap(java.util.IdentityHashMap) Stream(org.apache.orc.OrcProto.Stream) MutateHelper(org.apache.hadoop.hive.common.io.DiskRangeList.MutateHelper) Kind(org.apache.orc.OrcProto.Stream.Kind) OutStream(org.apache.orc.impl.OutStream) Stream(org.apache.orc.OrcProto.Stream) InStream(org.apache.orc.impl.InStream) CodedInputStream(com.google.protobuf.CodedInputStream) InputStream(java.io.InputStream) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) ColumnStreamData(org.apache.hadoop.hive.common.io.encoded.EncodedColumnBatch.ColumnStreamData) IOException(java.io.IOException) ByteBuffer(java.nio.ByteBuffer) IOException(java.io.IOException) MemoryBuffer(org.apache.hadoop.hive.common.io.encoded.MemoryBuffer)

Aggregations

DiskRangeList (org.apache.hadoop.hive.common.io.DiskRangeList)6 MutateHelper (org.apache.hadoop.hive.common.io.DiskRangeList.MutateHelper)6 ByteBuffer (java.nio.ByteBuffer)3 IdentityHashMap (java.util.IdentityHashMap)3 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)3 ColumnStreamData (org.apache.hadoop.hive.common.io.encoded.EncodedColumnBatch.ColumnStreamData)3 MemoryBuffer (org.apache.hadoop.hive.common.io.encoded.MemoryBuffer)3 CodedInputStream (com.google.protobuf.CodedInputStream)2 IOException (java.io.IOException)2 InputStream (java.io.InputStream)2 IncompleteCb (org.apache.hadoop.hive.ql.io.orc.encoded.IncompleteCb)2 OrcProto (org.apache.orc.OrcProto)2 Stream (org.apache.orc.OrcProto.Stream)2 Kind (org.apache.orc.OrcProto.Stream.Kind)2 InStream (org.apache.orc.impl.InStream)2 OutStream (org.apache.orc.impl.OutStream)2 Map (java.util.Map)1 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)1 ConcurrentSkipListMap (java.util.concurrent.ConcurrentSkipListMap)1 CreateHelper (org.apache.hadoop.hive.common.io.DiskRangeList.CreateHelper)1