use of org.apache.hadoop.hive.ql.io.orc.OrcFileValueWrapper in project hive by apache.
the class OrcFileMergeOperator method processKeyValuePairs.
private void processKeyValuePairs(Object key, Object value) throws HiveException {
String filePath = "";
try {
OrcFileValueWrapper v;
OrcFileKeyWrapper k;
if (key instanceof CombineHiveKey) {
k = (OrcFileKeyWrapper) ((CombineHiveKey) key).getKey();
} else {
k = (OrcFileKeyWrapper) key;
}
// skip incompatible file, files that are missing stripe statistics are set to incompatible
if (k.isIncompatFile()) {
LOG.warn("Incompatible ORC file merge! Stripe statistics is missing. " + k.getInputPath());
incompatFileSet.add(k.getInputPath());
return;
}
filePath = k.getInputPath().toUri().getPath();
fixTmpPath(k.getInputPath().getParent());
v = (OrcFileValueWrapper) value;
if (prevPath == null) {
prevPath = k.getInputPath();
reader = OrcFile.createReader(fs, k.getInputPath());
if (isLogInfoEnabled) {
LOG.info("ORC merge file input path: " + k.getInputPath());
}
}
// match this configuration before merging else will not be merged
if (outWriter == null) {
compression = k.getCompression();
compressBuffSize = k.getCompressBufferSize();
version = k.getVersion();
columnCount = k.getTypes().get(0).getSubtypesCount();
rowIndexStride = k.getRowIndexStride();
OrcFile.WriterOptions options = OrcFile.writerOptions(jc).compress(compression).version(version).rowIndexStride(rowIndexStride).inspector(reader.getObjectInspector());
// compression buffer size should only be set if compression is enabled
if (compression != CompressionKind.NONE) {
// enforce is required to retain the buffer sizes of old files instead of orc writer
// inferring the optimal buffer size
options.bufferSize(compressBuffSize).enforceBufferSize();
}
outWriter = OrcFile.createWriter(outPath, options);
if (isLogDebugEnabled) {
LOG.info("ORC merge file output path: " + outPath);
}
}
if (!checkCompatibility(k)) {
incompatFileSet.add(k.getInputPath());
return;
}
// next file in the path
if (!k.getInputPath().equals(prevPath)) {
reader = OrcFile.createReader(fs, k.getInputPath());
}
// initialize buffer to read the entire stripe
byte[] buffer = new byte[(int) v.getStripeInformation().getLength()];
fdis = fs.open(k.getInputPath());
fdis.readFully(v.getStripeInformation().getOffset(), buffer, 0, (int) v.getStripeInformation().getLength());
// append the stripe buffer to the new ORC file
outWriter.appendStripe(buffer, 0, buffer.length, v.getStripeInformation(), v.getStripeStatistics());
if (isLogInfoEnabled) {
LOG.info("Merged stripe from file " + k.getInputPath() + " [ offset : " + v.getStripeInformation().getOffset() + " length: " + v.getStripeInformation().getLength() + " row: " + v.getStripeStatistics().getColStats(0).getNumberOfValues() + " ]");
}
// add user metadata to footer in case of any
if (v.isLastStripeInFile()) {
outWriter.appendUserMetadata(v.getUserMetadata());
}
} catch (Throwable e) {
this.exception = true;
LOG.error("Closing operator..Exception: " + ExceptionUtils.getStackTrace(e));
throw new HiveException(e);
} finally {
if (exception) {
closeOp(true);
}
if (fdis != null) {
try {
fdis.close();
} catch (IOException e) {
throw new HiveException(String.format("Unable to close file %s", filePath), e);
} finally {
fdis = null;
}
}
}
}
Aggregations