Search in sources :

Example 1 with OrcEncodedColumnBatch

use of org.apache.hadoop.hive.ql.io.orc.encoded.Reader.OrcEncodedColumnBatch in project hive by apache.

the class EncodedReaderImpl method readEncodedColumns.

@Override
public void readEncodedColumns(int stripeIx, StripeInformation stripe, OrcProto.RowIndex[] indexes, List<OrcProto.ColumnEncoding> encodings, List<OrcProto.Stream> streamList, boolean[] included, boolean[][] colRgs, Consumer<OrcEncodedColumnBatch> consumer) throws IOException {
    // Note: for now we don't have to setError here, caller will setError if we throw.
    // We are also not supposed to call setDone, since we are only part of the operation.
    long stripeOffset = stripe.getOffset();
    // 1. Figure out what we have to read.
    // Stream offset in relation to the stripe.
    long offset = 0;
    // 1.1. Figure out which columns have a present stream
    boolean[] hasNull = RecordReaderUtils.findPresentStreamsByColumn(streamList, types);
    if (isTracingEnabled) {
        LOG.trace("The following columns have PRESENT streams: " + arrayToString(hasNull));
    }
    // We assume stream list is sorted by column and that non-data
    // streams do not interleave data streams for the same column.
    // 1.2. With that in mind, determine disk ranges to read/get from cache (not by stream).
    ColumnReadContext[] colCtxs = new ColumnReadContext[included.length];
    int colRgIx = -1;
    // Don't create context for the 0-s column.
    for (int i = 1; i < included.length; ++i) {
        if (!included[i])
            continue;
        colCtxs[i] = new ColumnReadContext(i, encodings.get(i), indexes[i], ++colRgIx);
        if (isTracingEnabled) {
            LOG.trace("Creating context: " + colCtxs[i].toString());
        }
    }
    boolean isCompressed = (codec != null);
    CreateHelper listToRead = new CreateHelper();
    boolean hasIndexOnlyCols = false;
    // Will always be the same for all cols at the moment.
    boolean[] includedRgs = null;
    for (OrcProto.Stream stream : streamList) {
        long length = stream.getLength();
        int colIx = stream.getColumn();
        OrcProto.Stream.Kind streamKind = stream.getKind();
        if (!included[colIx] || StreamName.getArea(streamKind) != StreamName.Area.DATA) {
            // We have a stream for included column, but in future it might have no data streams.
            // It's more like "has at least one column included that has an index stream".
            hasIndexOnlyCols = hasIndexOnlyCols || included[colIx];
            if (isTracingEnabled) {
                LOG.trace("Skipping stream for column " + colIx + ": " + streamKind + " at " + offset + ", " + length);
            }
            offset += length;
            continue;
        }
        ColumnReadContext ctx = colCtxs[colIx];
        assert ctx != null;
        includedRgs = colRgs[ctx.includedIx];
        int indexIx = RecordReaderUtils.getIndexPosition(ctx.encoding.getKind(), types.get(colIx).getKind(), streamKind, isCompressed, hasNull[colIx]);
        ctx.addStream(offset, stream, indexIx);
        if (isTracingEnabled) {
            LOG.trace("Adding stream for column " + colIx + ": " + streamKind + " at " + offset + ", " + length + ", index position " + indexIx);
        }
        if (includedRgs == null || RecordReaderUtils.isDictionary(streamKind, encodings.get(colIx))) {
            RecordReaderUtils.addEntireStreamToRanges(offset, length, listToRead, true);
            if (isTracingEnabled) {
                LOG.trace("Will read whole stream " + streamKind + "; added to " + listToRead.getTail());
            }
        } else {
            RecordReaderUtils.addRgFilteredStreamToRanges(stream, includedRgs, codec != null, indexes[colIx], encodings.get(colIx), types.get(colIx), bufferSize, hasNull[colIx], offset, length, listToRead, true);
        }
        offset += length;
    }
    boolean hasFileId = this.fileKey != null;
    if (listToRead.get() == null) {
        // TODO: there may be a bug here. Could there be partial RG filtering on index-only column?
        if (hasIndexOnlyCols && (includedRgs == null)) {
            OrcEncodedColumnBatch ecb = POOLS.ecbPool.take();
            ecb.init(fileKey, stripeIx, OrcEncodedColumnBatch.ALL_RGS, included.length);
            consumer.consumeData(ecb);
        } else {
            LOG.warn("Nothing to read for stripe [" + stripe + "]");
        }
        return;
    }
    // 2. Now, read all of the ranges from cache or disk.
    DiskRangeList.MutateHelper toRead = new DiskRangeList.MutateHelper(listToRead.get());
    if (/*isTracingEnabled && */
    LOG.isInfoEnabled()) {
        LOG.info("Resulting disk ranges to read (file " + fileKey + "): " + RecordReaderUtils.stringifyDiskRanges(toRead.next));
    }
    BooleanRef isAllInCache = new BooleanRef();
    if (hasFileId) {
        cacheWrapper.getFileData(fileKey, toRead.next, stripeOffset, CC_FACTORY, isAllInCache);
        if (/*isTracingEnabled && */
        LOG.isInfoEnabled()) {
            LOG.info("Disk ranges after cache (found everything " + isAllInCache.value + "; file " + fileKey + ", base offset " + stripeOffset + "): " + RecordReaderUtils.stringifyDiskRanges(toRead.next));
        }
    }
    if (!isAllInCache.value) {
        if (!isDataReaderOpen) {
            this.dataReader.open();
            isDataReaderOpen = true;
        }
        dataReader.readFileData(toRead.next, stripeOffset, cacheWrapper.getAllocator().isDirectAlloc());
    }
    // 3. For uncompressed case, we need some special processing before read.
    // Keep "toRead" list for future use, don't extract().
    DiskRangeList iter = toRead.next;
    if (codec == null) {
        for (int colIx = 0; colIx < colCtxs.length; ++colIx) {
            ColumnReadContext ctx = colCtxs[colIx];
            // This column is not included.
            if (ctx == null)
                continue;
            for (int streamIx = 0; streamIx < ctx.streamCount; ++streamIx) {
                StreamContext sctx = ctx.streams[streamIx];
                DiskRangeList newIter = preReadUncompressedStream(stripeOffset, iter, sctx.offset, sctx.offset + sctx.length);
                if (newIter != null) {
                    iter = newIter;
                }
            }
        }
        if (isTracingEnabled) {
            LOG.trace("Disk ranges after pre-read (file " + fileKey + ", base offset " + stripeOffset + "): " + RecordReaderUtils.stringifyDiskRanges(toRead.next));
        }
        // Reset the iter to start.
        iter = toRead.next;
    }
    // 4. Finally, decompress data, map per RG, and return to caller.
    // We go by RG and not by column because that is how data is processed.
    int rgCount = (int) Math.ceil((double) stripe.getNumberOfRows() / rowIndexStride);
    for (int rgIx = 0; rgIx < rgCount; ++rgIx) {
        boolean isLastRg = rgIx == rgCount - 1;
        // Create the batch we will use to return data for this RG.
        OrcEncodedColumnBatch ecb = POOLS.ecbPool.take();
        ecb.init(fileKey, stripeIx, rgIx, included.length);
        boolean isRGSelected = true;
        for (int colIx = 0; colIx < colCtxs.length; ++colIx) {
            ColumnReadContext ctx = colCtxs[colIx];
            // This column is not included.
            if (ctx == null)
                continue;
            if (isTracingEnabled) {
                LOG.trace("ctx: {} rgIx: {} isLastRg: {} rgCount: {}", ctx, rgIx, isLastRg, rgCount);
            }
            // TODO: simplify this now that high-level cache has been removed. Same RGs for all cols.
            if (colRgs[ctx.includedIx] != null && !colRgs[ctx.includedIx][rgIx]) {
                // RG x col filtered.
                isRGSelected = false;
                if (isTracingEnabled) {
                    LOG.trace("colIxMod: {} rgIx: {} colRgs[{}]: {} colRgs[{}][{}]: {}", ctx.includedIx, rgIx, ctx.includedIx, Arrays.toString(colRgs[ctx.includedIx]), ctx.includedIx, rgIx, colRgs[ctx.includedIx][rgIx]);
                }
                continue;
            }
            OrcProto.RowIndexEntry index = ctx.rowIndex.getEntry(rgIx), nextIndex = isLastRg ? null : ctx.rowIndex.getEntry(rgIx + 1);
            ecb.initOrcColumn(ctx.colIx);
            for (int streamIx = 0; streamIx < ctx.streamCount; ++streamIx) {
                StreamContext sctx = ctx.streams[streamIx];
                ColumnStreamData cb = null;
                try {
                    if (RecordReaderUtils.isDictionary(sctx.kind, ctx.encoding)) {
                        // This stream is for entire stripe and needed for every RG; uncompress once and reuse.
                        if (isTracingEnabled) {
                            LOG.trace("Getting stripe-level stream [" + sctx.kind + ", " + ctx.encoding + "] for" + " column " + ctx.colIx + " RG " + rgIx + " at " + sctx.offset + ", " + sctx.length);
                        }
                        if (sctx.stripeLevelStream == null) {
                            sctx.stripeLevelStream = POOLS.csdPool.take();
                            // We will be using this for each RG while also sending RGs to processing.
                            // To avoid buffers being unlocked, run refcount one ahead; we will not increase
                            // it when building the last RG, so each RG processing will decref once, and the
                            // last one will unlock the buffers.
                            sctx.stripeLevelStream.incRef();
                            // For stripe-level streams we don't need the extra refcount on the block.
                            // See class comment about refcounts.
                            long unlockUntilCOffset = sctx.offset + sctx.length;
                            DiskRangeList lastCached = readEncodedStream(stripeOffset, iter, sctx.offset, sctx.offset + sctx.length, sctx.stripeLevelStream, unlockUntilCOffset, sctx.offset);
                            if (lastCached != null) {
                                iter = lastCached;
                            }
                        }
                        if (!isLastRg) {
                            sctx.stripeLevelStream.incRef();
                        }
                        cb = sctx.stripeLevelStream;
                    } else {
                        // This stream can be separated by RG using index. Let's do that.
                        // Offset to where this RG begins.
                        long cOffset = sctx.offset + index.getPositions(sctx.streamIndexOffset);
                        // Offset relative to the beginning of the stream of where this RG ends.
                        long nextCOffsetRel = isLastRg ? sctx.length : nextIndex.getPositions(sctx.streamIndexOffset);
                        // Offset before which this RG is guaranteed to end. Can only be estimated.
                        // We estimate the same way for compressed and uncompressed for now.
                        long endCOffset = sctx.offset + RecordReaderUtils.estimateRgEndOffset(isCompressed, isLastRg, nextCOffsetRel, sctx.length, bufferSize);
                        // As we read, we can unlock initial refcounts for the buffers that end before
                        // the data that we need for this RG.
                        long unlockUntilCOffset = sctx.offset + nextCOffsetRel;
                        cb = createRgColumnStreamData(rgIx, isLastRg, ctx.colIx, sctx, cOffset, endCOffset, isCompressed);
                        boolean isStartOfStream = sctx.bufferIter == null;
                        DiskRangeList lastCached = readEncodedStream(stripeOffset, (isStartOfStream ? iter : sctx.bufferIter), cOffset, endCOffset, cb, unlockUntilCOffset, sctx.offset);
                        if (lastCached != null) {
                            sctx.bufferIter = iter = lastCached;
                        }
                    }
                    ecb.setStreamData(ctx.colIx, sctx.kind.getNumber(), cb);
                } catch (Exception ex) {
                    DiskRangeList drl = toRead == null ? null : toRead.next;
                    LOG.error("Error getting stream [" + sctx.kind + ", " + ctx.encoding + "] for" + " column " + ctx.colIx + " RG " + rgIx + " at " + sctx.offset + ", " + sctx.length + "; toRead " + RecordReaderUtils.stringifyDiskRanges(drl), ex);
                    throw (ex instanceof IOException) ? (IOException) ex : new IOException(ex);
                }
            }
        }
        if (isRGSelected) {
            consumer.consumeData(ecb);
        }
    }
    if (isTracingEnabled) {
        LOG.trace("Disk ranges after preparing all the data " + RecordReaderUtils.stringifyDiskRanges(toRead.next));
    }
    // Release the unreleased buffers. See class comment about refcounts.
    releaseInitialRefcounts(toRead.next);
    releaseCacheChunksIntoObjectPool(toRead.next);
}
Also used : DiskRangeList(org.apache.hadoop.hive.common.io.DiskRangeList) OrcProto(org.apache.orc.OrcProto) BooleanRef(org.apache.hadoop.hive.common.io.DataCache.BooleanRef) IOException(java.io.IOException) IOException(java.io.IOException) CreateHelper(org.apache.hadoop.hive.common.io.DiskRangeList.CreateHelper) OrcEncodedColumnBatch(org.apache.hadoop.hive.ql.io.orc.encoded.Reader.OrcEncodedColumnBatch) OutStream(org.apache.orc.impl.OutStream) ColumnStreamData(org.apache.hadoop.hive.common.io.encoded.EncodedColumnBatch.ColumnStreamData)

Example 2 with OrcEncodedColumnBatch

use of org.apache.hadoop.hive.ql.io.orc.encoded.Reader.OrcEncodedColumnBatch in project hive by apache.

the class EncodedReaderImpl method readEncodedColumns.

@Override
public void readEncodedColumns(int stripeIx, StripeInformation stripe, OrcProto.RowIndex[] indexes, List<OrcProto.ColumnEncoding> encodings, List<OrcProto.Stream> streamList, boolean[] physicalFileIncludes, boolean[] rgs, Consumer<OrcEncodedColumnBatch> consumer) throws IOException {
    // Note: for now we don't have to setError here, caller will setError if we throw.
    // We are also not supposed to call setDone, since we are only part of the operation.
    long stripeOffset = stripe.getOffset();
    // 1. Figure out what we have to read.
    // Stream offset in relation to the stripe.
    long offset = 0;
    // 1.1. Figure out which columns have a present stream
    boolean[] hasNull = findPresentStreamsByColumn(streamList, types);
    if (isTracingEnabled) {
        LOG.trace("The following columns have PRESENT streams: " + arrayToString(hasNull));
    }
    // We assume stream list is sorted by column and that non-data
    // streams do not interleave data streams for the same column.
    // 1.2. With that in mind, determine disk ranges to read/get from cache (not by stream).
    ColumnReadContext[] colCtxs = new ColumnReadContext[physicalFileIncludes.length];
    int colRgIx = -1;
    // Don't create context for the 0-s column.
    for (int i = 1; i < physicalFileIncludes.length; ++i) {
        if (!physicalFileIncludes[i])
            continue;
        ColumnEncoding enc = encodings.get(i);
        colCtxs[i] = new ColumnReadContext(i, enc, indexes[i], ++colRgIx);
        if (isTracingEnabled) {
            LOG.trace("Creating context: " + colCtxs[i].toString());
        }
        trace.logColumnRead(i, colRgIx, enc.getKind());
    }
    CreateHelper listToRead = new CreateHelper();
    boolean hasIndexOnlyCols = false, hasAnyNonData = false;
    for (OrcProto.Stream stream : streamList) {
        long length = stream.getLength();
        int colIx = stream.getColumn();
        OrcProto.Stream.Kind streamKind = stream.getKind();
        boolean isIndexCol = StreamName.getArea(streamKind) != StreamName.Area.DATA;
        hasAnyNonData = hasAnyNonData || isIndexCol;
        // We have a stream for included column, but in future it might have no data streams.
        // It's more like "has at least one column included that has an index stream".
        hasIndexOnlyCols = hasIndexOnlyCols || (isIndexCol && physicalFileIncludes[colIx]);
        if (!physicalFileIncludes[colIx] || isIndexCol) {
            if (isTracingEnabled) {
                LOG.trace("Skipping stream for column " + colIx + ": " + streamKind + " at " + offset + ", " + length);
            }
            trace.logSkipStream(colIx, streamKind, offset, length);
            offset += length;
            continue;
        }
        ColumnReadContext ctx = colCtxs[colIx];
        assert ctx != null;
        int indexIx = RecordReaderUtils.getIndexPosition(ctx.encoding.getKind(), fileSchema.findSubtype(colIx).getCategory(), streamKind, isCompressed, hasNull[colIx]);
        ctx.addStream(offset, stream, indexIx);
        if (isTracingEnabled) {
            LOG.trace("Adding stream for column " + colIx + ": " + streamKind + " at " + offset + ", " + length + ", index position " + indexIx);
        }
        if (rgs == null || RecordReaderUtils.isDictionary(streamKind, encodings.get(colIx))) {
            trace.logAddStream(colIx, streamKind, offset, length, indexIx, true);
            addEntireStreamToRanges(offset, length, listToRead, true);
            if (isTracingEnabled) {
                LOG.trace("Will read whole stream " + streamKind + "; added to " + listToRead.getTail());
            }
        } else {
            trace.logAddStream(colIx, streamKind, offset, length, indexIx, false);
            addRgFilteredStreamToRanges(stream, rgs, isCompressed, indexes[colIx], encodings.get(colIx), fileSchema.findSubtype(colIx).getCategory(), bufferSize, hasNull[colIx], offset, length, listToRead, true);
        }
        offset += length;
    }
    boolean hasFileId = this.fileKey != null;
    if (listToRead.get() == null) {
        // No data to read for this stripe. Check if we have some included index-only columns.
        // For example, count(1) would have the root column, that has no data stream, included.
        // It may also happen that we have a column included with no streams whatsoever. That
        // should only be possible if the file has no index streams.
        boolean hasAnyIncludes = false;
        if (!hasIndexOnlyCols) {
            for (int i = 0; i < physicalFileIncludes.length; ++i) {
                if (!physicalFileIncludes[i])
                    continue;
                hasAnyIncludes = true;
                break;
            }
        }
        boolean nonProjectionRead = hasIndexOnlyCols || (!hasAnyNonData && hasAnyIncludes);
        // We should probably just disable filtering for such cases if they exist.
        if (nonProjectionRead && (rgs == SargApplier.READ_ALL_RGS)) {
            OrcEncodedColumnBatch ecb = POOLS.ecbPool.take();
            ecb.init(fileKey, stripeIx, OrcEncodedColumnBatch.ALL_RGS, physicalFileIncludes.length);
            try {
                consumer.consumeData(ecb);
            } catch (InterruptedException e) {
                LOG.error("IO thread interrupted while queueing data");
                throw new IOException(e);
            }
        } else {
            LOG.warn("Nothing to read for stripe [" + stripe + "]");
        }
        return;
    }
    // 2. Now, read all of the ranges from cache or disk.
    IdentityHashMap<ByteBuffer, Boolean> toRelease = new IdentityHashMap<>();
    MutateHelper toRead = getDataFromCacheAndDisk(listToRead.get(), stripeOffset, hasFileId, toRelease);
    // 3. For uncompressed case, we need some special processing before read.
    // Basically, we are trying to create artificial, consistent ranges to cache, as there are
    // no CBs in an uncompressed file. At the end of this processing, the list would contain
    // either cache buffers, or buffers allocated by us and not cached (if we are only reading
    // parts of the data for some ranges and don't want to cache it). Both are represented by
    // CacheChunks, so the list is just CacheChunk-s from that point on.
    DiskRangeList iter = preReadUncompressedStreams(stripeOffset, colCtxs, toRead, toRelease);
    // 4. Finally, decompress data, map per RG, and return to caller.
    // We go by RG and not by column because that is how data is processed.
    boolean hasError = true;
    try {
        int rgCount = rowIndexStride == 0 ? 1 : (int) Math.ceil((double) stripe.getNumberOfRows() / rowIndexStride);
        for (int rgIx = 0; rgIx < rgCount; ++rgIx) {
            if (rgs != null && !rgs[rgIx]) {
                // RG filtered.
                continue;
            }
            boolean isLastRg = rgIx == rgCount - 1;
            // Create the batch we will use to return data for this RG.
            OrcEncodedColumnBatch ecb = POOLS.ecbPool.take();
            trace.logStartRg(rgIx);
            boolean hasErrorForEcb = true;
            try {
                ecb.init(fileKey, stripeIx, rgIx, physicalFileIncludes.length);
                for (int colIx = 0; colIx < colCtxs.length; ++colIx) {
                    ColumnReadContext ctx = colCtxs[colIx];
                    // This column is not included
                    if (ctx == null)
                        continue;
                    OrcProto.RowIndexEntry index;
                    OrcProto.RowIndexEntry nextIndex;
                    // index is disabled
                    if (ctx.rowIndex == null) {
                        if (isTracingEnabled) {
                            LOG.trace("Row index is null. Likely reading a file with indexes disabled.");
                        }
                        index = null;
                        nextIndex = null;
                    } else {
                        index = ctx.rowIndex.getEntry(rgIx);
                        nextIndex = isLastRg ? null : ctx.rowIndex.getEntry(rgIx + 1);
                    }
                    if (isTracingEnabled) {
                        LOG.trace("ctx: {} rgIx: {} isLastRg: {} rgCount: {}", ctx, rgIx, isLastRg, rgCount);
                    }
                    ecb.initOrcColumn(ctx.colIx);
                    trace.logStartCol(ctx.colIx);
                    for (int streamIx = 0; streamIx < ctx.streamCount; ++streamIx) {
                        StreamContext sctx = ctx.streams[streamIx];
                        ColumnStreamData cb = null;
                        try {
                            if (RecordReaderUtils.isDictionary(sctx.kind, ctx.encoding) || index == null) {
                                // This stream is for entire stripe and needed for every RG; uncompress once and reuse.
                                if (sctx.stripeLevelStream == null) {
                                    if (isTracingEnabled) {
                                        LOG.trace("Getting stripe-level stream [" + sctx.kind + ", " + ctx.encoding + "] for" + " column " + ctx.colIx + " RG " + rgIx + " at " + sctx.offset + ", " + sctx.length);
                                    }
                                    trace.logStartStripeStream(sctx.kind);
                                    sctx.stripeLevelStream = POOLS.csdPool.take();
                                    // We will be using this for each RG while also sending RGs to processing.
                                    // To avoid buffers being unlocked, run refcount one ahead; so each RG
                                    // processing will decref once, and the last one will unlock the buffers.
                                    sctx.stripeLevelStream.incRef();
                                    // For stripe-level streams we don't need the extra refcount on the block.
                                    // See class comment about refcounts.
                                    long unlockUntilCOffset = sctx.offset + sctx.length;
                                    DiskRangeList lastCached = readEncodedStream(stripeOffset, iter, sctx.offset, sctx.offset + sctx.length, sctx.stripeLevelStream, unlockUntilCOffset, sctx.offset, toRelease);
                                    if (lastCached != null) {
                                        iter = lastCached;
                                    }
                                }
                                sctx.stripeLevelStream.incRef();
                                cb = sctx.stripeLevelStream;
                            } else {
                                // This stream can be separated by RG using index. Let's do that.
                                // Offset to where this RG begins.
                                long cOffset = sctx.offset + index.getPositions(sctx.streamIndexOffset);
                                // Offset relative to the beginning of the stream of where this RG ends.
                                long nextCOffsetRel = isLastRg ? sctx.length : nextIndex.getPositions(sctx.streamIndexOffset);
                                // Offset before which this RG is guaranteed to end. Can only be estimated.
                                // We estimate the same way for compressed and uncompressed for now.
                                long endCOffset = sctx.offset + estimateRgEndOffset(isCompressed, isLastRg, nextCOffsetRel, sctx.length, bufferSize);
                                // As we read, we can unlock initial refcounts for the buffers that end before
                                // the data that we need for this RG.
                                long unlockUntilCOffset = sctx.offset + nextCOffsetRel;
                                cb = createRgColumnStreamData(rgIx, isLastRg, ctx.colIx, sctx, cOffset, endCOffset, isCompressed, unlockUntilCOffset);
                                boolean isStartOfStream = sctx.bufferIter == null;
                                DiskRangeList lastCached = readEncodedStream(stripeOffset, (isStartOfStream ? iter : sctx.bufferIter), cOffset, endCOffset, cb, unlockUntilCOffset, sctx.offset, toRelease);
                                if (lastCached != null) {
                                    sctx.bufferIter = iter = lastCached;
                                }
                            }
                        } catch (Exception ex) {
                            DiskRangeList drl = toRead == null ? null : toRead.next;
                            LOG.error("Error getting stream [" + sctx.kind + ", " + ctx.encoding + "] for" + " column " + ctx.colIx + " RG " + rgIx + " at " + sctx.offset + ", " + sctx.length + "; toRead " + RecordReaderUtils.stringifyDiskRanges(drl), ex);
                            throw (ex instanceof IOException) ? (IOException) ex : new IOException(ex);
                        } finally {
                            // Otherwise, we won't release consumer refcounts for a partially read stream.
                            if (cb != null) {
                                ecb.setStreamData(ctx.colIx, sctx.kind.getNumber(), cb);
                            }
                        }
                    }
                }
                hasErrorForEcb = false;
            } finally {
                if (hasErrorForEcb) {
                    releaseEcbRefCountsOnError(ecb);
                }
            }
            try {
                consumer.consumeData(ecb);
            // After this, the non-initial refcounts are the responsibility of the consumer.
            } catch (InterruptedException e) {
                LOG.error("IO thread interrupted while queueing data");
                releaseEcbRefCountsOnError(ecb);
                throw new IOException(e);
            }
        }
        if (isTracingEnabled) {
            LOG.trace("Disk ranges after preparing all the data " + RecordReaderUtils.stringifyDiskRanges(toRead.next));
        }
        trace.logRanges(fileKey, stripeOffset, toRead.next, RangesSrc.PREREAD);
        hasError = false;
    } finally {
        try {
            // Release the unreleased stripe-level buffers. See class comment about refcounts.
            for (int colIx = 0; colIx < colCtxs.length; ++colIx) {
                ColumnReadContext ctx = colCtxs[colIx];
                // This column is not included.
                if (ctx == null)
                    continue;
                for (int streamIx = 0; streamIx < ctx.streamCount; ++streamIx) {
                    StreamContext sctx = ctx.streams[streamIx];
                    if (sctx == null || sctx.stripeLevelStream == null)
                        continue;
                    if (0 != sctx.stripeLevelStream.decRef())
                        continue;
                    // essentially the "consumer" refcount being released here.
                    for (MemoryBuffer buf : sctx.stripeLevelStream.getCacheBuffers()) {
                        LOG.trace("Unlocking {} at the end of processing", buf);
                        cacheWrapper.releaseBuffer(buf);
                    }
                }
            }
            releaseInitialRefcounts(toRead.next);
            // Release buffers as we are done with all the streams... also see toRelease comment.
            releaseBuffers(toRelease.keySet(), true);
        } catch (Throwable t) {
            if (!hasError)
                throw new IOException(t);
            LOG.error("Error during the cleanup after another error; ignoring", t);
        }
    }
}
Also used : DiskRangeList(org.apache.hadoop.hive.common.io.DiskRangeList) OrcProto(org.apache.orc.OrcProto) IdentityHashMap(java.util.IdentityHashMap) CreateHelper(org.apache.hadoop.hive.common.io.DiskRangeList.CreateHelper) Stream(org.apache.orc.OrcProto.Stream) MutateHelper(org.apache.hadoop.hive.common.io.DiskRangeList.MutateHelper) Kind(org.apache.orc.OrcProto.Stream.Kind) OutStream(org.apache.orc.impl.OutStream) Stream(org.apache.orc.OrcProto.Stream) InStream(org.apache.orc.impl.InStream) CodedInputStream(com.google.protobuf.CodedInputStream) InputStream(java.io.InputStream) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) ColumnStreamData(org.apache.hadoop.hive.common.io.encoded.EncodedColumnBatch.ColumnStreamData) IOException(java.io.IOException) ByteBuffer(java.nio.ByteBuffer) IOException(java.io.IOException) ColumnEncoding(org.apache.orc.OrcProto.ColumnEncoding) OrcEncodedColumnBatch(org.apache.hadoop.hive.ql.io.orc.encoded.Reader.OrcEncodedColumnBatch) MemoryBuffer(org.apache.hadoop.hive.common.io.encoded.MemoryBuffer)

Example 3 with OrcEncodedColumnBatch

use of org.apache.hadoop.hive.ql.io.orc.encoded.Reader.OrcEncodedColumnBatch in project hive by apache.

the class SerDeEncodedDataReader method processOneSlice.

private boolean processOneSlice(CacheWriter.CacheStripeData diskData, boolean[] splitIncludes, int stripeIx, StripeData cacheData, long startTime) throws IOException, InterruptedException {
    logProcessOneSlice(stripeIx, diskData, cacheData);
    ColumnEncoding[] cacheEncodings = cacheData == null ? null : cacheData.getEncodings();
    LlapSerDeDataBuffer[][][] cacheBuffers = cacheData == null ? null : cacheData.getData();
    long cacheRowCount = cacheData == null ? -1L : cacheData.getRowCount();
    SerDeStripeMetadata metadata = new SerDeStripeMetadata(stripeIx);
    StripeData sliceToCache = null;
    boolean hasAllData = diskData == null;
    if (!hasAllData) {
        sliceToCache = createSliceToCache(diskData, cacheData);
        metadata.setEncodings(combineCacheAndWriterEncodings(cacheEncodings, diskData.encodings));
        metadata.setRowCount(diskData.rowCount);
    } else {
        metadata.setEncodings(Lists.newArrayList(cacheEncodings));
        metadata.setRowCount(cacheRowCount);
    }
    if (LlapIoImpl.LOG.isTraceEnabled()) {
        LlapIoImpl.LOG.trace("Derived stripe metadata for this split is " + metadata);
    }
    consumer.setStripeMetadata(metadata);
    OrcEncodedColumnBatch ecb = useObjectPools ? ECB_POOL.take() : new OrcEncodedColumnBatch();
    ecb.init(fileKey, metadata.getStripeIx(), OrcEncodedColumnBatch.ALL_RGS, writerIncludes.length);
    // Skip the 0th column that is the root structure.
    for (int colIx = 1; colIx < writerIncludes.length; ++colIx) {
        if (!writerIncludes[colIx])
            continue;
        ecb.initColumn(colIx, OrcEncodedColumnBatch.MAX_DATA_STREAMS);
        if (!hasAllData && splitIncludes[colIx]) {
            // The column has been read from disk.
            List<CacheWriter.CacheStreamData> streams = diskData.colStreams.get(colIx);
            LlapSerDeDataBuffer[][] newCacheDataForCol = createArrayToCache(sliceToCache, colIx, streams);
            // Struct column, such as root?
            if (streams == null)
                continue;
            Iterator<CacheWriter.CacheStreamData> iter = streams.iterator();
            while (iter.hasNext()) {
                CacheWriter.CacheStreamData stream = iter.next();
                if (stream.isSuppressed) {
                    if (LlapIoImpl.LOG.isTraceEnabled()) {
                        LlapIoImpl.LOG.trace("Removing a suppressed stream " + stream.name);
                    }
                    iter.remove();
                    discardUncachedBuffers(stream.data);
                    continue;
                }
                int streamIx = setStreamDataToCache(newCacheDataForCol, stream);
                ColumnStreamData cb = useObjectPools ? CSD_POOL.take() : new ColumnStreamData();
                cb.incRef();
                cb.setCacheBuffers(stream.data);
                ecb.setStreamData(colIx, streamIx, cb);
            }
        } else {
            processColumnCacheData(cacheBuffers, ecb, colIx);
        }
    }
    if (processStop()) {
        recordReaderTime(startTime);
        return false;
    }
    // but for now just rely on the cache put to lock them before we send them over.
    if (LlapIoImpl.CACHE_LOGGER.isTraceEnabled()) {
        LlapIoImpl.CACHE_LOGGER.trace("Data to cache from the read " + sliceToCache);
    }
    cacheFileData(sliceToCache);
    return sendEcbToConsumer(ecb, cacheData != null, diskData);
}
Also used : StripeData(org.apache.hadoop.hive.llap.cache.SerDeLowLevelCacheImpl.StripeData) SerDeStripeMetadata(org.apache.hadoop.hive.llap.io.decode.GenericColumnVectorProducer.SerDeStripeMetadata) ColumnEncoding(org.apache.orc.OrcProto.ColumnEncoding) OrcEncodedColumnBatch(org.apache.hadoop.hive.ql.io.orc.encoded.Reader.OrcEncodedColumnBatch) LlapSerDeDataBuffer(org.apache.hadoop.hive.llap.cache.SerDeLowLevelCacheImpl.LlapSerDeDataBuffer) ColumnStreamData(org.apache.hadoop.hive.common.io.encoded.EncodedColumnBatch.ColumnStreamData)

Example 4 with OrcEncodedColumnBatch

use of org.apache.hadoop.hive.ql.io.orc.encoded.Reader.OrcEncodedColumnBatch in project hive by apache.

the class SerDeEncodedDataReader method processOneSlice.

/**
 * Unlike the other overload of processOneSlice, doesn't cache data.
 */
private boolean processOneSlice(Vectors diskData, boolean[] splitIncludes, int stripeIx, StripeData cacheData, long startTime) throws IOException, InterruptedException {
    if (diskData == null) {
        // The other overload should have been used.
        throw new AssertionError();
    }
    // LlapIoImpl.LOG.debug("diskData " + diskData);
    logProcessOneSlice(stripeIx, diskData, cacheData);
    if (cacheData == null && diskData.getRowCount() == 0) {
        // Nothing to process.
        return true;
    }
    ColumnEncoding[] cacheEncodings = cacheData == null ? null : cacheData.getEncodings();
    LlapSerDeDataBuffer[][][] cacheBuffers = cacheData == null ? null : cacheData.getData();
    if (cacheData != null) {
        // Don't validate column count - no encodings for vectors.
        validateCacheAndDisk(cacheData, diskData.getRowCount(), -1, diskData);
    }
    SerDeStripeMetadata metadata = new SerDeStripeMetadata(stripeIx);
    metadata.setEncodings(Arrays.asList(cacheEncodings == null ? new ColumnEncoding[splitIncludes.length] : cacheEncodings));
    metadata.setRowCount(diskData.getRowCount());
    if (LlapIoImpl.LOG.isTraceEnabled()) {
        LlapIoImpl.LOG.trace("Derived stripe metadata for this split is " + metadata);
    }
    consumer.setStripeMetadata(metadata);
    OrcEncodedColumnBatch ecb = useObjectPools ? ECB_POOL.take() : new OrcEncodedColumnBatch();
    ecb.init(fileKey, metadata.getStripeIx(), OrcEncodedColumnBatch.ALL_RGS, writerIncludes.length);
    int vectorsIx = 0;
    for (int colIx = 0; colIx < writerIncludes.length; ++colIx) {
        // Skip the 0-th column, since it won't have a vector after reading the text source.
        if (colIx == 0)
            continue;
        if (!writerIncludes[colIx])
            continue;
        if (splitIncludes[colIx]) {
            List<ColumnVector> vectors = diskData.getVectors(vectorsIx++);
            if (LlapIoImpl.LOG.isTraceEnabled()) {
                LlapIoImpl.LOG.trace("Processing vectors for column " + colIx + ": " + vectors);
            }
            ecb.initColumnWithVectors(colIx, vectors);
        } else {
            ecb.initColumn(colIx, OrcEncodedColumnBatch.MAX_DATA_STREAMS);
            processColumnCacheData(cacheBuffers, ecb, colIx);
        }
    }
    if (processStop()) {
        recordReaderTime(startTime);
        return false;
    }
    return sendEcbToConsumer(ecb, cacheData != null, null);
}
Also used : ColumnEncoding(org.apache.orc.OrcProto.ColumnEncoding) OrcEncodedColumnBatch(org.apache.hadoop.hive.ql.io.orc.encoded.Reader.OrcEncodedColumnBatch) LlapSerDeDataBuffer(org.apache.hadoop.hive.llap.cache.SerDeLowLevelCacheImpl.LlapSerDeDataBuffer) SerDeStripeMetadata(org.apache.hadoop.hive.llap.io.decode.GenericColumnVectorProducer.SerDeStripeMetadata) ColumnVector(org.apache.hadoop.hive.ql.exec.vector.ColumnVector)

Aggregations

OrcEncodedColumnBatch (org.apache.hadoop.hive.ql.io.orc.encoded.Reader.OrcEncodedColumnBatch)4 ColumnStreamData (org.apache.hadoop.hive.common.io.encoded.EncodedColumnBatch.ColumnStreamData)3 ColumnEncoding (org.apache.orc.OrcProto.ColumnEncoding)3 IOException (java.io.IOException)2 DiskRangeList (org.apache.hadoop.hive.common.io.DiskRangeList)2 CreateHelper (org.apache.hadoop.hive.common.io.DiskRangeList.CreateHelper)2 LlapSerDeDataBuffer (org.apache.hadoop.hive.llap.cache.SerDeLowLevelCacheImpl.LlapSerDeDataBuffer)2 SerDeStripeMetadata (org.apache.hadoop.hive.llap.io.decode.GenericColumnVectorProducer.SerDeStripeMetadata)2 OrcProto (org.apache.orc.OrcProto)2 OutStream (org.apache.orc.impl.OutStream)2 CodedInputStream (com.google.protobuf.CodedInputStream)1 InputStream (java.io.InputStream)1 ByteBuffer (java.nio.ByteBuffer)1 IdentityHashMap (java.util.IdentityHashMap)1 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)1 BooleanRef (org.apache.hadoop.hive.common.io.DataCache.BooleanRef)1 MutateHelper (org.apache.hadoop.hive.common.io.DiskRangeList.MutateHelper)1 MemoryBuffer (org.apache.hadoop.hive.common.io.encoded.MemoryBuffer)1 StripeData (org.apache.hadoop.hive.llap.cache.SerDeLowLevelCacheImpl.StripeData)1 ColumnVector (org.apache.hadoop.hive.ql.exec.vector.ColumnVector)1