Search in sources :

Example 1 with FooterBuffer

use of org.apache.hadoop.hive.ql.exec.FooterBuffer in project hive by apache.

the class HiveContextAwareRecordReader method doNext.

public boolean doNext(K key, V value) throws IOException {
    if (this.isSorted) {
        if (this.getIOContext().shouldEndBinarySearch() || (!this.getIOContext().useSorted() && this.wasUsingSortedSearch)) {
            beginLinearSearch();
            this.wasUsingSortedSearch = false;
            this.getIOContext().setEndBinarySearch(false);
        }
        if (this.getIOContext().useSorted()) {
            if (this.genericUDFClassName == null && this.getIOContext().getGenericUDFClassName() != null) {
                setGenericUDFClassName(this.getIOContext().getGenericUDFClassName());
            }
            if (this.getIOContext().isBinarySearching()) {
                // Proceed with a binary search
                if (this.getIOContext().getComparison() != null) {
                    switch(this.getIOContext().getComparison()) {
                        case GREATER:
                        case EQUAL:
                            // Indexes have only one entry per value, could go linear from here, if we want to
                            // use this for any sorted table, we'll need to continue the search
                            rangeEnd = previousPosition;
                            break;
                        case LESS:
                            rangeStart = previousPosition;
                            break;
                        default:
                            break;
                    }
                }
                long position = (rangeStart + rangeEnd) / 2;
                sync(position);
                long newPosition = getSyncedPosition();
                // matching rows must be in the final block, so we can end the binary search.
                if (newPosition == previousPosition || newPosition >= splitEnd) {
                    this.getIOContext().setBinarySearching(false);
                    sync(rangeStart);
                }
                previousPosition = newPosition;
            } else if (foundAllTargets()) {
                // Found all possible rows which will not be filtered
                return false;
            }
        }
    }
    try {
        /**
         * When start reading new file, check header, footer rows.
         * If file contains header, skip header lines before reading the records.
         * If file contains footer, used a FooterBuffer to remove footer lines
         * at the end of the table file.
         */
        if (this.ioCxtRef.getCurrentBlockStart() == 0) {
            // Check if the table file has header to skip.
            footerBuffer = null;
            Path filePath = this.ioCxtRef.getInputPath();
            PartitionDesc part = null;
            try {
                if (pathToPartitionInfo == null) {
                    pathToPartitionInfo = Utilities.getMapWork(jobConf).getPathToPartitionInfo();
                }
                part = HiveFileFormatUtils.getFromPathRecursively(pathToPartitionInfo, filePath, IOPrepareCache.get().getPartitionDescMap());
            } catch (AssertionError ae) {
                LOG.info("Cannot get partition description from " + this.ioCxtRef.getInputPath() + "because " + ae.getMessage());
                part = null;
            } catch (Exception e) {
                LOG.info("Cannot get partition description from " + this.ioCxtRef.getInputPath() + "because " + e.getMessage());
                part = null;
            }
            TableDesc table = (part == null) ? null : part.getTableDesc();
            if (table != null) {
                headerCount = Utilities.getHeaderCount(table);
                footerCount = Utilities.getFooterCount(table, jobConf);
            }
            // If input contains header, skip header.
            if (!Utilities.skipHeader(recordReader, headerCount, (WritableComparable) key, (Writable) value)) {
                return false;
            }
            if (footerCount > 0) {
                footerBuffer = new FooterBuffer();
                if (!footerBuffer.initializeBuffer(jobConf, recordReader, footerCount, (WritableComparable) key, (Writable) value)) {
                    return false;
                }
            }
        }
        if (footerBuffer == null) {
            // Table files don't have footer rows.
            return recordReader.next(key, value);
        } else {
            return footerBuffer.updateBuffer(jobConf, recordReader, (WritableComparable) key, (Writable) value);
        }
    } catch (Exception e) {
        return HiveIOExceptionHandlerUtil.handleRecordReaderNextException(e, jobConf);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) WritableComparable(org.apache.hadoop.io.WritableComparable) Writable(org.apache.hadoop.io.Writable) PartitionDesc(org.apache.hadoop.hive.ql.plan.PartitionDesc) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) IOException(java.io.IOException) FooterBuffer(org.apache.hadoop.hive.ql.exec.FooterBuffer)

Aggregations

IOException (java.io.IOException)1 Path (org.apache.hadoop.fs.Path)1 FooterBuffer (org.apache.hadoop.hive.ql.exec.FooterBuffer)1 PartitionDesc (org.apache.hadoop.hive.ql.plan.PartitionDesc)1 TableDesc (org.apache.hadoop.hive.ql.plan.TableDesc)1 Writable (org.apache.hadoop.io.Writable)1 WritableComparable (org.apache.hadoop.io.WritableComparable)1