Search in sources :

Example 1 with BooleanRef

use of org.apache.hadoop.hive.common.io.DataCache.BooleanRef in project hive by apache.

the class SerDeEncodedDataReader method readFileWithCache.

public Boolean readFileWithCache(long startTime) throws IOException, InterruptedException {
    if (fileKey == null)
        return false;
    BooleanRef gotAllData = new BooleanRef();
    long endOfSplit = split.getStart() + split.getLength();
    this.cachedData = cache.getFileData(fileKey, split.getStart(), endOfSplit, writerIncludes, CC_FACTORY, counters, gotAllData);
    if (cachedData == null) {
        if (LlapIoImpl.CACHE_LOGGER.isTraceEnabled()) {
            LlapIoImpl.CACHE_LOGGER.trace("No data for the split found in cache");
        }
        return false;
    }
    String[] hosts = extractHosts(split, false), inMemoryHosts = extractHosts(split, true);
    List<StripeData> slices = cachedData.getData();
    if (slices.isEmpty())
        return false;
    long uncachedPrefixEnd = slices.get(0).getKnownTornStart(), uncachedSuffixStart = slices.get(slices.size() - 1).getLastEnd();
    Ref<Integer> stripeIx = Ref.from(0);
    if (uncachedPrefixEnd > split.getStart()) {
        // TODO: can we merge neighboring splits? So we don't init so many readers.
        FileSplit sliceSplit = new FileSplit(split.getPath(), split.getStart(), uncachedPrefixEnd - split.getStart(), hosts, inMemoryHosts);
        if (!processOneFileSplit(sliceSplit, startTime, stripeIx, null))
            return null;
    }
    while (!slices.isEmpty()) {
        StripeData slice = slices.get(0);
        long start = slice.getKnownTornStart();
        // Will also read the last row.
        long len = slice.getLastStart() - start;
        FileSplit sliceSplit = new FileSplit(split.getPath(), start, len, hosts, inMemoryHosts);
        if (!processOneFileSplit(sliceSplit, startTime, stripeIx, slice))
            return null;
    }
    boolean isUnfortunate = false;
    if (uncachedSuffixStart == endOfSplit) {
        // This is rather obscure. The end of last row cached is precisely at the split end offset.
        // If the split is in the middle of the file, LRR would read one more row after that,
        // therefore as unfortunate as it is, we have to do a one-row read. However, for that to
        // have happened, someone should have supplied a split that ends inside the last row, i.e.
        // a few bytes earlier than the current split, which is pretty unlikely. What is more likely
        // is that the split, and the last row, both end at the end of file. Check for this.
        long size = split.getPath().getFileSystem(daemonConf).getFileStatus(split.getPath()).getLen();
        isUnfortunate = size > endOfSplit;
        if (isUnfortunate) {
            // Log at warn, given how unfortunate this is.
            LlapIoImpl.LOG.warn("One-row mismatch at the end of split " + split.getPath() + " at " + endOfSplit + "; file size is " + size);
        }
    }
    if (uncachedSuffixStart < endOfSplit || isUnfortunate) {
        // Note: we assume 0-length split is correct given now LRR interprets offsets (reading an
        // extra row). Should we instead assume 1+ chars and add 1 for isUnfortunate?
        FileSplit splitPart = new FileSplit(split.getPath(), uncachedSuffixStart, endOfSplit - uncachedSuffixStart, hosts, inMemoryHosts);
        if (!processOneFileSplit(splitPart, startTime, stripeIx, null))
            return null;
    }
    return true;
}
Also used : StripeData(org.apache.hadoop.hive.llap.cache.SerDeLowLevelCacheImpl.StripeData) BooleanRef(org.apache.hadoop.hive.common.io.DataCache.BooleanRef) FileSplit(org.apache.hadoop.mapred.FileSplit)

Example 2 with BooleanRef

use of org.apache.hadoop.hive.common.io.DataCache.BooleanRef in project hive by apache.

the class EncodedReaderImpl method getDataFromCacheAndDisk.

private DiskRangeList.MutateHelper getDataFromCacheAndDisk(DiskRangeList listToRead, long stripeOffset, boolean hasFileId, IdentityHashMap<ByteBuffer, Boolean> toRelease) throws IOException {
    DiskRangeList.MutateHelper toRead = new DiskRangeList.MutateHelper(listToRead);
    if (LOG.isInfoEnabled()) {
        LOG.info("Resulting disk ranges to read (file " + fileKey + "): " + RecordReaderUtils.stringifyDiskRanges(toRead.next));
    }
    BooleanRef isAllInCache = new BooleanRef();
    if (hasFileId) {
        cacheWrapper.getFileData(fileKey, toRead.next, stripeOffset, CC_FACTORY, isAllInCache);
        if (LOG.isInfoEnabled()) {
            LOG.info("Disk ranges after cache (found everything " + isAllInCache.value + "; file " + fileKey + ", base offset " + stripeOffset + "): " + RecordReaderUtils.stringifyDiskRanges(toRead.next));
        }
        trace.logRanges(fileKey, stripeOffset, toRead.next, RangesSrc.CACHE);
    }
    // can be freed in advance, we remove it from the map.
    if (!isAllInCache.value) {
        boolean hasError = true;
        try {
            if (!isDataReaderOpen) {
                this.dataReader.open();
                isDataReaderOpen = true;
            }
            dataReader.readFileData(toRead.next, stripeOffset, cacheWrapper.getAllocator().isDirectAlloc());
            toRelease = new IdentityHashMap<>();
            DiskRangeList drl = toRead.next;
            while (drl != null) {
                if (drl instanceof BufferChunk) {
                    toRelease.put(drl.getData(), true);
                }
                drl = drl.next;
            }
            hasError = false;
        } finally {
            // We are assuming here that toRelease will not be present in such cases.
            if (hasError) {
                releaseInitialRefcounts(toRead.next);
            }
        }
    }
    return toRead;
}
Also used : MutateHelper(org.apache.hadoop.hive.common.io.DiskRangeList.MutateHelper) DiskRangeList(org.apache.hadoop.hive.common.io.DiskRangeList) BooleanRef(org.apache.hadoop.hive.common.io.DataCache.BooleanRef) BufferChunk(org.apache.orc.impl.BufferChunk) MutateHelper(org.apache.hadoop.hive.common.io.DiskRangeList.MutateHelper)

Example 3 with BooleanRef

use of org.apache.hadoop.hive.common.io.DataCache.BooleanRef in project hive by apache.

the class EncodedReaderImpl method readEncodedColumns.

@Override
public void readEncodedColumns(int stripeIx, StripeInformation stripe, OrcProto.RowIndex[] indexes, List<OrcProto.ColumnEncoding> encodings, List<OrcProto.Stream> streamList, boolean[] included, boolean[][] colRgs, Consumer<OrcEncodedColumnBatch> consumer) throws IOException {
    // Note: for now we don't have to setError here, caller will setError if we throw.
    // We are also not supposed to call setDone, since we are only part of the operation.
    long stripeOffset = stripe.getOffset();
    // 1. Figure out what we have to read.
    // Stream offset in relation to the stripe.
    long offset = 0;
    // 1.1. Figure out which columns have a present stream
    boolean[] hasNull = RecordReaderUtils.findPresentStreamsByColumn(streamList, types);
    if (isTracingEnabled) {
        LOG.trace("The following columns have PRESENT streams: " + arrayToString(hasNull));
    }
    // We assume stream list is sorted by column and that non-data
    // streams do not interleave data streams for the same column.
    // 1.2. With that in mind, determine disk ranges to read/get from cache (not by stream).
    ColumnReadContext[] colCtxs = new ColumnReadContext[included.length];
    int colRgIx = -1;
    // Don't create context for the 0-s column.
    for (int i = 1; i < included.length; ++i) {
        if (!included[i])
            continue;
        colCtxs[i] = new ColumnReadContext(i, encodings.get(i), indexes[i], ++colRgIx);
        if (isTracingEnabled) {
            LOG.trace("Creating context: " + colCtxs[i].toString());
        }
    }
    boolean isCompressed = (codec != null);
    CreateHelper listToRead = new CreateHelper();
    boolean hasIndexOnlyCols = false;
    // Will always be the same for all cols at the moment.
    boolean[] includedRgs = null;
    for (OrcProto.Stream stream : streamList) {
        long length = stream.getLength();
        int colIx = stream.getColumn();
        OrcProto.Stream.Kind streamKind = stream.getKind();
        if (!included[colIx] || StreamName.getArea(streamKind) != StreamName.Area.DATA) {
            // We have a stream for included column, but in future it might have no data streams.
            // It's more like "has at least one column included that has an index stream".
            hasIndexOnlyCols = hasIndexOnlyCols || included[colIx];
            if (isTracingEnabled) {
                LOG.trace("Skipping stream for column " + colIx + ": " + streamKind + " at " + offset + ", " + length);
            }
            offset += length;
            continue;
        }
        ColumnReadContext ctx = colCtxs[colIx];
        assert ctx != null;
        includedRgs = colRgs[ctx.includedIx];
        int indexIx = RecordReaderUtils.getIndexPosition(ctx.encoding.getKind(), types.get(colIx).getKind(), streamKind, isCompressed, hasNull[colIx]);
        ctx.addStream(offset, stream, indexIx);
        if (isTracingEnabled) {
            LOG.trace("Adding stream for column " + colIx + ": " + streamKind + " at " + offset + ", " + length + ", index position " + indexIx);
        }
        if (includedRgs == null || RecordReaderUtils.isDictionary(streamKind, encodings.get(colIx))) {
            RecordReaderUtils.addEntireStreamToRanges(offset, length, listToRead, true);
            if (isTracingEnabled) {
                LOG.trace("Will read whole stream " + streamKind + "; added to " + listToRead.getTail());
            }
        } else {
            RecordReaderUtils.addRgFilteredStreamToRanges(stream, includedRgs, codec != null, indexes[colIx], encodings.get(colIx), types.get(colIx), bufferSize, hasNull[colIx], offset, length, listToRead, true);
        }
        offset += length;
    }
    boolean hasFileId = this.fileKey != null;
    if (listToRead.get() == null) {
        // TODO: there may be a bug here. Could there be partial RG filtering on index-only column?
        if (hasIndexOnlyCols && (includedRgs == null)) {
            OrcEncodedColumnBatch ecb = POOLS.ecbPool.take();
            ecb.init(fileKey, stripeIx, OrcEncodedColumnBatch.ALL_RGS, included.length);
            consumer.consumeData(ecb);
        } else {
            LOG.warn("Nothing to read for stripe [" + stripe + "]");
        }
        return;
    }
    // 2. Now, read all of the ranges from cache or disk.
    DiskRangeList.MutateHelper toRead = new DiskRangeList.MutateHelper(listToRead.get());
    if (/*isTracingEnabled && */
    LOG.isInfoEnabled()) {
        LOG.info("Resulting disk ranges to read (file " + fileKey + "): " + RecordReaderUtils.stringifyDiskRanges(toRead.next));
    }
    BooleanRef isAllInCache = new BooleanRef();
    if (hasFileId) {
        cacheWrapper.getFileData(fileKey, toRead.next, stripeOffset, CC_FACTORY, isAllInCache);
        if (/*isTracingEnabled && */
        LOG.isInfoEnabled()) {
            LOG.info("Disk ranges after cache (found everything " + isAllInCache.value + "; file " + fileKey + ", base offset " + stripeOffset + "): " + RecordReaderUtils.stringifyDiskRanges(toRead.next));
        }
    }
    if (!isAllInCache.value) {
        if (!isDataReaderOpen) {
            this.dataReader.open();
            isDataReaderOpen = true;
        }
        dataReader.readFileData(toRead.next, stripeOffset, cacheWrapper.getAllocator().isDirectAlloc());
    }
    // 3. For uncompressed case, we need some special processing before read.
    // Keep "toRead" list for future use, don't extract().
    DiskRangeList iter = toRead.next;
    if (codec == null) {
        for (int colIx = 0; colIx < colCtxs.length; ++colIx) {
            ColumnReadContext ctx = colCtxs[colIx];
            // This column is not included.
            if (ctx == null)
                continue;
            for (int streamIx = 0; streamIx < ctx.streamCount; ++streamIx) {
                StreamContext sctx = ctx.streams[streamIx];
                DiskRangeList newIter = preReadUncompressedStream(stripeOffset, iter, sctx.offset, sctx.offset + sctx.length);
                if (newIter != null) {
                    iter = newIter;
                }
            }
        }
        if (isTracingEnabled) {
            LOG.trace("Disk ranges after pre-read (file " + fileKey + ", base offset " + stripeOffset + "): " + RecordReaderUtils.stringifyDiskRanges(toRead.next));
        }
        // Reset the iter to start.
        iter = toRead.next;
    }
    // 4. Finally, decompress data, map per RG, and return to caller.
    // We go by RG and not by column because that is how data is processed.
    int rgCount = (int) Math.ceil((double) stripe.getNumberOfRows() / rowIndexStride);
    for (int rgIx = 0; rgIx < rgCount; ++rgIx) {
        boolean isLastRg = rgIx == rgCount - 1;
        // Create the batch we will use to return data for this RG.
        OrcEncodedColumnBatch ecb = POOLS.ecbPool.take();
        ecb.init(fileKey, stripeIx, rgIx, included.length);
        boolean isRGSelected = true;
        for (int colIx = 0; colIx < colCtxs.length; ++colIx) {
            ColumnReadContext ctx = colCtxs[colIx];
            // This column is not included.
            if (ctx == null)
                continue;
            if (isTracingEnabled) {
                LOG.trace("ctx: {} rgIx: {} isLastRg: {} rgCount: {}", ctx, rgIx, isLastRg, rgCount);
            }
            // TODO: simplify this now that high-level cache has been removed. Same RGs for all cols.
            if (colRgs[ctx.includedIx] != null && !colRgs[ctx.includedIx][rgIx]) {
                // RG x col filtered.
                isRGSelected = false;
                if (isTracingEnabled) {
                    LOG.trace("colIxMod: {} rgIx: {} colRgs[{}]: {} colRgs[{}][{}]: {}", ctx.includedIx, rgIx, ctx.includedIx, Arrays.toString(colRgs[ctx.includedIx]), ctx.includedIx, rgIx, colRgs[ctx.includedIx][rgIx]);
                }
                continue;
            }
            OrcProto.RowIndexEntry index = ctx.rowIndex.getEntry(rgIx), nextIndex = isLastRg ? null : ctx.rowIndex.getEntry(rgIx + 1);
            ecb.initOrcColumn(ctx.colIx);
            for (int streamIx = 0; streamIx < ctx.streamCount; ++streamIx) {
                StreamContext sctx = ctx.streams[streamIx];
                ColumnStreamData cb = null;
                try {
                    if (RecordReaderUtils.isDictionary(sctx.kind, ctx.encoding)) {
                        // This stream is for entire stripe and needed for every RG; uncompress once and reuse.
                        if (isTracingEnabled) {
                            LOG.trace("Getting stripe-level stream [" + sctx.kind + ", " + ctx.encoding + "] for" + " column " + ctx.colIx + " RG " + rgIx + " at " + sctx.offset + ", " + sctx.length);
                        }
                        if (sctx.stripeLevelStream == null) {
                            sctx.stripeLevelStream = POOLS.csdPool.take();
                            // We will be using this for each RG while also sending RGs to processing.
                            // To avoid buffers being unlocked, run refcount one ahead; we will not increase
                            // it when building the last RG, so each RG processing will decref once, and the
                            // last one will unlock the buffers.
                            sctx.stripeLevelStream.incRef();
                            // For stripe-level streams we don't need the extra refcount on the block.
                            // See class comment about refcounts.
                            long unlockUntilCOffset = sctx.offset + sctx.length;
                            DiskRangeList lastCached = readEncodedStream(stripeOffset, iter, sctx.offset, sctx.offset + sctx.length, sctx.stripeLevelStream, unlockUntilCOffset, sctx.offset);
                            if (lastCached != null) {
                                iter = lastCached;
                            }
                        }
                        if (!isLastRg) {
                            sctx.stripeLevelStream.incRef();
                        }
                        cb = sctx.stripeLevelStream;
                    } else {
                        // This stream can be separated by RG using index. Let's do that.
                        // Offset to where this RG begins.
                        long cOffset = sctx.offset + index.getPositions(sctx.streamIndexOffset);
                        // Offset relative to the beginning of the stream of where this RG ends.
                        long nextCOffsetRel = isLastRg ? sctx.length : nextIndex.getPositions(sctx.streamIndexOffset);
                        // Offset before which this RG is guaranteed to end. Can only be estimated.
                        // We estimate the same way for compressed and uncompressed for now.
                        long endCOffset = sctx.offset + RecordReaderUtils.estimateRgEndOffset(isCompressed, isLastRg, nextCOffsetRel, sctx.length, bufferSize);
                        // As we read, we can unlock initial refcounts for the buffers that end before
                        // the data that we need for this RG.
                        long unlockUntilCOffset = sctx.offset + nextCOffsetRel;
                        cb = createRgColumnStreamData(rgIx, isLastRg, ctx.colIx, sctx, cOffset, endCOffset, isCompressed);
                        boolean isStartOfStream = sctx.bufferIter == null;
                        DiskRangeList lastCached = readEncodedStream(stripeOffset, (isStartOfStream ? iter : sctx.bufferIter), cOffset, endCOffset, cb, unlockUntilCOffset, sctx.offset);
                        if (lastCached != null) {
                            sctx.bufferIter = iter = lastCached;
                        }
                    }
                    ecb.setStreamData(ctx.colIx, sctx.kind.getNumber(), cb);
                } catch (Exception ex) {
                    DiskRangeList drl = toRead == null ? null : toRead.next;
                    LOG.error("Error getting stream [" + sctx.kind + ", " + ctx.encoding + "] for" + " column " + ctx.colIx + " RG " + rgIx + " at " + sctx.offset + ", " + sctx.length + "; toRead " + RecordReaderUtils.stringifyDiskRanges(drl), ex);
                    throw (ex instanceof IOException) ? (IOException) ex : new IOException(ex);
                }
            }
        }
        if (isRGSelected) {
            consumer.consumeData(ecb);
        }
    }
    if (isTracingEnabled) {
        LOG.trace("Disk ranges after preparing all the data " + RecordReaderUtils.stringifyDiskRanges(toRead.next));
    }
    // Release the unreleased buffers. See class comment about refcounts.
    releaseInitialRefcounts(toRead.next);
    releaseCacheChunksIntoObjectPool(toRead.next);
}
Also used : DiskRangeList(org.apache.hadoop.hive.common.io.DiskRangeList) OrcProto(org.apache.orc.OrcProto) BooleanRef(org.apache.hadoop.hive.common.io.DataCache.BooleanRef) IOException(java.io.IOException) IOException(java.io.IOException) CreateHelper(org.apache.hadoop.hive.common.io.DiskRangeList.CreateHelper) OrcEncodedColumnBatch(org.apache.hadoop.hive.ql.io.orc.encoded.Reader.OrcEncodedColumnBatch) OutStream(org.apache.orc.impl.OutStream) ColumnStreamData(org.apache.hadoop.hive.common.io.encoded.EncodedColumnBatch.ColumnStreamData)

Aggregations

BooleanRef (org.apache.hadoop.hive.common.io.DataCache.BooleanRef)3 DiskRangeList (org.apache.hadoop.hive.common.io.DiskRangeList)2 IOException (java.io.IOException)1 CreateHelper (org.apache.hadoop.hive.common.io.DiskRangeList.CreateHelper)1 MutateHelper (org.apache.hadoop.hive.common.io.DiskRangeList.MutateHelper)1 ColumnStreamData (org.apache.hadoop.hive.common.io.encoded.EncodedColumnBatch.ColumnStreamData)1 StripeData (org.apache.hadoop.hive.llap.cache.SerDeLowLevelCacheImpl.StripeData)1 OrcEncodedColumnBatch (org.apache.hadoop.hive.ql.io.orc.encoded.Reader.OrcEncodedColumnBatch)1 FileSplit (org.apache.hadoop.mapred.FileSplit)1 OrcProto (org.apache.orc.OrcProto)1 BufferChunk (org.apache.orc.impl.BufferChunk)1 OutStream (org.apache.orc.impl.OutStream)1