Search in sources :

Example 1 with OrcFileValueWrapper

use of org.apache.hadoop.hive.ql.io.orc.OrcFileValueWrapper in project hive by apache.

the class OrcFileMergeOperator method processKeyValuePairs.

private void processKeyValuePairs(Object key, Object value) throws HiveException {
    String filePath = "";
    try {
        OrcFileValueWrapper v;
        OrcFileKeyWrapper k;
        if (key instanceof CombineHiveKey) {
            k = (OrcFileKeyWrapper) ((CombineHiveKey) key).getKey();
        } else {
            k = (OrcFileKeyWrapper) key;
        }
        // skip incompatible file, files that are missing stripe statistics are set to incompatible
        if (k.isIncompatFile()) {
            LOG.warn("Incompatible ORC file merge! Stripe statistics is missing. " + k.getInputPath());
            incompatFileSet.add(k.getInputPath());
            return;
        }
        filePath = k.getInputPath().toUri().getPath();
        fixTmpPath(k.getInputPath().getParent());
        v = (OrcFileValueWrapper) value;
        if (prevPath == null) {
            prevPath = k.getInputPath();
            reader = OrcFile.createReader(fs, k.getInputPath());
            if (isLogInfoEnabled) {
                LOG.info("ORC merge file input path: " + k.getInputPath());
            }
        }
        // match this configuration before merging else will not be merged
        if (outWriter == null) {
            compression = k.getCompression();
            compressBuffSize = k.getCompressBufferSize();
            version = k.getVersion();
            columnCount = k.getTypes().get(0).getSubtypesCount();
            rowIndexStride = k.getRowIndexStride();
            OrcFile.WriterOptions options = OrcFile.writerOptions(jc).compress(compression).version(version).rowIndexStride(rowIndexStride).inspector(reader.getObjectInspector());
            // compression buffer size should only be set if compression is enabled
            if (compression != CompressionKind.NONE) {
                // enforce is required to retain the buffer sizes of old files instead of orc writer
                // inferring the optimal buffer size
                options.bufferSize(compressBuffSize).enforceBufferSize();
            }
            outWriter = OrcFile.createWriter(outPath, options);
            if (isLogDebugEnabled) {
                LOG.info("ORC merge file output path: " + outPath);
            }
        }
        if (!checkCompatibility(k)) {
            incompatFileSet.add(k.getInputPath());
            return;
        }
        // next file in the path
        if (!k.getInputPath().equals(prevPath)) {
            reader = OrcFile.createReader(fs, k.getInputPath());
        }
        // initialize buffer to read the entire stripe
        byte[] buffer = new byte[(int) v.getStripeInformation().getLength()];
        fdis = fs.open(k.getInputPath());
        fdis.readFully(v.getStripeInformation().getOffset(), buffer, 0, (int) v.getStripeInformation().getLength());
        // append the stripe buffer to the new ORC file
        outWriter.appendStripe(buffer, 0, buffer.length, v.getStripeInformation(), v.getStripeStatistics());
        if (isLogInfoEnabled) {
            LOG.info("Merged stripe from file " + k.getInputPath() + " [ offset : " + v.getStripeInformation().getOffset() + " length: " + v.getStripeInformation().getLength() + " row: " + v.getStripeStatistics().getColStats(0).getNumberOfValues() + " ]");
        }
        // add user metadata to footer in case of any
        if (v.isLastStripeInFile()) {
            outWriter.appendUserMetadata(v.getUserMetadata());
        }
    } catch (Throwable e) {
        this.exception = true;
        LOG.error("Closing operator..Exception: " + ExceptionUtils.getStackTrace(e));
        throw new HiveException(e);
    } finally {
        if (exception) {
            closeOp(true);
        }
        if (fdis != null) {
            try {
                fdis.close();
            } catch (IOException e) {
                throw new HiveException(String.format("Unable to close file %s", filePath), e);
            } finally {
                fdis = null;
            }
        }
    }
}
Also used : HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) CombineHiveKey(org.apache.hadoop.hive.shims.CombineHiveKey) OrcFile(org.apache.hadoop.hive.ql.io.orc.OrcFile) IOException(java.io.IOException) OrcFileKeyWrapper(org.apache.hadoop.hive.ql.io.orc.OrcFileKeyWrapper) OrcFileValueWrapper(org.apache.hadoop.hive.ql.io.orc.OrcFileValueWrapper)

Aggregations

IOException (java.io.IOException)1 OrcFile (org.apache.hadoop.hive.ql.io.orc.OrcFile)1 OrcFileKeyWrapper (org.apache.hadoop.hive.ql.io.orc.OrcFileKeyWrapper)1 OrcFileValueWrapper (org.apache.hadoop.hive.ql.io.orc.OrcFileValueWrapper)1 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)1 CombineHiveKey (org.apache.hadoop.hive.shims.CombineHiveKey)1