Search in sources :

Example 1 with CombineHiveKey

use of org.apache.hadoop.hive.shims.CombineHiveKey in project hive by apache.

the class TestInputOutputFormat method testCombinationInputFormat.

// test non-vectorized, non-acid, combine
@Test
public void testCombinationInputFormat() throws Exception {
    // get the object inspector for MyRow
    StructObjectInspector inspector;
    synchronized (TestOrcFile.class) {
        inspector = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    JobConf conf = createMockExecutionEnvironment(workDir, new Path("mock:///"), "combination", inspector, false, 1);
    // write the orc file to the mock file system
    Path partDir = new Path(conf.get("mapred.input.dir"));
    Writer writer = OrcFile.createWriter(new Path(partDir, "0_0"), OrcFile.writerOptions(conf).blockPadding(false).bufferSize(1024).inspector(inspector));
    for (int i = 0; i < 10; ++i) {
        writer.addRow(new MyRow(i, 2 * i));
    }
    writer.close();
    Path path = new Path("mock:/combination/p=0/0_0");
    setBlocks(path, conf, new MockBlock("host0", "host1"));
    MockFileSystem mockFs = (MockFileSystem) partDir.getFileSystem(conf);
    int length0 = getLength(path, conf);
    writer = OrcFile.createWriter(new Path(partDir, "1_0"), OrcFile.writerOptions(conf).blockPadding(false).bufferSize(1024).inspector(inspector));
    for (int i = 10; i < 20; ++i) {
        writer.addRow(new MyRow(i, 2 * i));
    }
    writer.close();
    Path path1 = new Path("mock:/combination/p=0/1_0");
    setBlocks(path1, conf, new MockBlock("host1", "host2"));
    // call getsplits
    HiveInputFormat<?, ?> inputFormat = new CombineHiveInputFormat<WritableComparable, Writable>();
    InputSplit[] splits = inputFormat.getSplits(conf, 1);
    assertEquals(1, splits.length);
    CombineHiveInputFormat.CombineHiveInputSplit split = (CombineHiveInputFormat.CombineHiveInputSplit) splits[0];
    // check split
    assertEquals(2, split.getNumPaths());
    assertEquals(partDir.toString() + "/0_0", split.getPath(0).toString());
    assertEquals(partDir.toString() + "/1_0", split.getPath(1).toString());
    assertEquals(length0, split.getLength(0));
    assertEquals(getLength(path1, conf), split.getLength(1));
    assertEquals(0, split.getOffset(0));
    assertEquals(0, split.getOffset(1));
    // hadoop-1 gets 3 and hadoop-2 gets 0. *sigh*
    // best answer would be 1.
    assertTrue(3 >= split.getLocations().length);
    // read split
    org.apache.hadoop.mapred.RecordReader<CombineHiveKey, OrcStruct> reader = inputFormat.getRecordReader(split, conf, Reporter.NULL);
    CombineHiveKey key = reader.createKey();
    OrcStruct value = reader.createValue();
    for (int i = 0; i < 20; i++) {
        assertEquals(true, reader.next(key, value));
        assertEquals(i, ((IntWritable) value.getFieldValue(0)).get());
    }
    assertEquals(false, reader.next(key, value));
}
Also used : Path(org.apache.hadoop.fs.Path) CombineHiveInputFormat(org.apache.hadoop.hive.ql.io.CombineHiveInputFormat) CombineHiveKey(org.apache.hadoop.hive.shims.CombineHiveKey) JobConf(org.apache.hadoop.mapred.JobConf) InputSplit(org.apache.hadoop.mapred.InputSplit) RecordWriter(org.apache.hadoop.mapred.RecordWriter) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Test(org.junit.Test)

Example 2 with CombineHiveKey

use of org.apache.hadoop.hive.shims.CombineHiveKey in project hive by apache.

the class ColumnTruncateMapper method map.

@Override
public void map(Object k, RCFileValueBufferWrapper value, OutputCollector<Object, Object> output, Reporter reporter) throws IOException {
    try {
        RCFileKeyBufferWrapper key = null;
        if (k instanceof CombineHiveKey) {
            key = (RCFileKeyBufferWrapper) ((CombineHiveKey) k).getKey();
        } else {
            key = (RCFileKeyBufferWrapper) k;
        }
        if (work.getListBucketingCtx().calculateListBucketingLevel() > 0) {
            if (!this.tmpPathFixedConcatenate) {
                fixTmpPathConcatenate(key.getInputPath().getParent(), work.getListBucketingCtx().calculateListBucketingLevel());
                tmpPathFixedConcatenate = true;
            }
        }
        if (outWriter == null) {
            codec = key.getCodec();
            columnNumber = key.getKeyBuffer().getColumnNumber();
            RCFileOutputFormat.setColumnNumber(jc, columnNumber);
            outWriter = new RCFile.Writer(fs, jc, outPath, null, codec);
        }
        for (Integer i : work.getDroppedColumns()) {
            key.getKeyBuffer().nullColumn(i);
            value.getValueBuffer().nullColumn(i);
        }
        int keyLength = key.getKeyBuffer().getSize();
        int recordLength = key.getKeyBuffer().getSize();
        for (int columnLen : key.getKeyBuffer().getEachColumnValueLen()) {
            recordLength += columnLen;
        }
        outWriter.flushBlock(key.getKeyBuffer(), value.getValueBuffer(), recordLength, keyLength, key.getCompressedKeyLength());
    } catch (Throwable e) {
        this.exception = true;
        close();
        throw new IOException(e);
    }
}
Also used : RCFile(org.apache.hadoop.hive.ql.io.RCFile) CombineHiveKey(org.apache.hadoop.hive.shims.CombineHiveKey) RCFileKeyBufferWrapper(org.apache.hadoop.hive.ql.io.rcfile.merge.RCFileKeyBufferWrapper) IOException(java.io.IOException)

Example 3 with CombineHiveKey

use of org.apache.hadoop.hive.shims.CombineHiveKey in project hive by apache.

the class RCFileMergeOperator method processKeyValuePairs.

private void processKeyValuePairs(Object k, Object v) throws HiveException {
    try {
        RCFileKeyBufferWrapper key;
        if (k instanceof CombineHiveKey) {
            key = (RCFileKeyBufferWrapper) ((CombineHiveKey) k).getKey();
        } else {
            key = (RCFileKeyBufferWrapper) k;
        }
        RCFileValueBufferWrapper value = (RCFileValueBufferWrapper) v;
        fixTmpPath(key.getInputPath().getParent());
        if (outWriter == null) {
            codec = key.getCodec();
            columnNumber = key.getKeyBuffer().getColumnNumber();
            RCFileOutputFormat.setColumnNumber(jc, columnNumber);
            outWriter = new RCFile.Writer(fs, jc, outPath, null, codec);
        }
        boolean sameCodec = ((codec == key.getCodec()) || codec.getClass().equals(key.getCodec().getClass()));
        if ((key.getKeyBuffer().getColumnNumber() != columnNumber) || (!sameCodec)) {
            throw new IOException("RCFileMerge failed because the input files" + " use different CompressionCodec or have different column number" + " setting.");
        }
        outWriter.flushBlock(key.getKeyBuffer(), value.getValueBuffer(), key.getRecordLength(), key.getKeyLength(), key.getCompressedKeyLength());
    } catch (Throwable e) {
        this.exception = true;
        closeOp(true);
        throw new HiveException(e);
    }
}
Also used : RCFile(org.apache.hadoop.hive.ql.io.RCFile) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) CombineHiveKey(org.apache.hadoop.hive.shims.CombineHiveKey) RCFileKeyBufferWrapper(org.apache.hadoop.hive.ql.io.rcfile.merge.RCFileKeyBufferWrapper) RCFileValueBufferWrapper(org.apache.hadoop.hive.ql.io.rcfile.merge.RCFileValueBufferWrapper) IOException(java.io.IOException)

Example 4 with CombineHiveKey

use of org.apache.hadoop.hive.shims.CombineHiveKey in project hive by apache.

the class OrcFileMergeOperator method processKeyValuePairs.

private void processKeyValuePairs(Object key, Object value) throws HiveException {
    String filePath = "";
    try {
        OrcFileValueWrapper v;
        OrcFileKeyWrapper k;
        if (key instanceof CombineHiveKey) {
            k = (OrcFileKeyWrapper) ((CombineHiveKey) key).getKey();
        } else {
            k = (OrcFileKeyWrapper) key;
        }
        // skip incompatible file, files that are missing stripe statistics are set to incompatible
        if (k.isIncompatFile()) {
            LOG.warn("Incompatible ORC file merge! Stripe statistics is missing. " + k.getInputPath());
            incompatFileSet.add(k.getInputPath());
            return;
        }
        filePath = k.getInputPath().toUri().getPath();
        fixTmpPath(k.getInputPath().getParent());
        v = (OrcFileValueWrapper) value;
        if (prevPath == null) {
            prevPath = k.getInputPath();
            reader = OrcFile.createReader(fs, k.getInputPath());
            if (isLogInfoEnabled) {
                LOG.info("ORC merge file input path: " + k.getInputPath());
            }
        }
        // match this configuration before merging else will not be merged
        if (outWriter == null) {
            compression = k.getCompression();
            compressBuffSize = k.getCompressBufferSize();
            version = k.getVersion();
            columnCount = k.getTypes().get(0).getSubtypesCount();
            rowIndexStride = k.getRowIndexStride();
            OrcFile.WriterOptions options = OrcFile.writerOptions(jc).compress(compression).version(version).rowIndexStride(rowIndexStride).inspector(reader.getObjectInspector());
            // compression buffer size should only be set if compression is enabled
            if (compression != CompressionKind.NONE) {
                // enforce is required to retain the buffer sizes of old files instead of orc writer
                // inferring the optimal buffer size
                options.bufferSize(compressBuffSize).enforceBufferSize();
            }
            outWriter = OrcFile.createWriter(outPath, options);
            if (isLogDebugEnabled) {
                LOG.info("ORC merge file output path: " + outPath);
            }
        }
        if (!checkCompatibility(k)) {
            incompatFileSet.add(k.getInputPath());
            return;
        }
        // next file in the path
        if (!k.getInputPath().equals(prevPath)) {
            reader = OrcFile.createReader(fs, k.getInputPath());
        }
        // initialize buffer to read the entire stripe
        byte[] buffer = new byte[(int) v.getStripeInformation().getLength()];
        fdis = fs.open(k.getInputPath());
        fdis.readFully(v.getStripeInformation().getOffset(), buffer, 0, (int) v.getStripeInformation().getLength());
        // append the stripe buffer to the new ORC file
        outWriter.appendStripe(buffer, 0, buffer.length, v.getStripeInformation(), v.getStripeStatistics());
        if (isLogInfoEnabled) {
            LOG.info("Merged stripe from file " + k.getInputPath() + " [ offset : " + v.getStripeInformation().getOffset() + " length: " + v.getStripeInformation().getLength() + " row: " + v.getStripeStatistics().getColStats(0).getNumberOfValues() + " ]");
        }
        // add user metadata to footer in case of any
        if (v.isLastStripeInFile()) {
            outWriter.appendUserMetadata(v.getUserMetadata());
        }
    } catch (Throwable e) {
        this.exception = true;
        LOG.error("Closing operator..Exception: " + ExceptionUtils.getStackTrace(e));
        throw new HiveException(e);
    } finally {
        if (exception) {
            closeOp(true);
        }
        if (fdis != null) {
            try {
                fdis.close();
            } catch (IOException e) {
                throw new HiveException(String.format("Unable to close file %s", filePath), e);
            } finally {
                fdis = null;
            }
        }
    }
}
Also used : HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) CombineHiveKey(org.apache.hadoop.hive.shims.CombineHiveKey) OrcFile(org.apache.hadoop.hive.ql.io.orc.OrcFile) IOException(java.io.IOException) OrcFileKeyWrapper(org.apache.hadoop.hive.ql.io.orc.OrcFileKeyWrapper) OrcFileValueWrapper(org.apache.hadoop.hive.ql.io.orc.OrcFileValueWrapper)

Example 5 with CombineHiveKey

use of org.apache.hadoop.hive.shims.CombineHiveKey in project hive by apache.

the class PartialScanMapper method map.

@Override
public void map(Object k, RCFileValueBufferWrapper value, OutputCollector<Object, Object> output, Reporter reporter) throws IOException {
    if (rp == null) {
        this.rp = reporter;
        MapredContext.get().setReporter(reporter);
    }
    try {
        //CombineHiveInputFormat may be set in PartialScanTask.
        RCFileKeyBufferWrapper key = (RCFileKeyBufferWrapper) ((k instanceof CombineHiveKey) ? ((CombineHiveKey) k).getKey() : k);
        // calculate rawdatasize
        KeyBuffer keyBuffer = key.getKeyBuffer();
        long[] uncompressedColumnSizes = new long[keyBuffer.getColumnNumber()];
        for (int i = 0; i < keyBuffer.getColumnNumber(); i++) {
            uncompressedColumnSizes[i] += keyBuffer.getEachColumnUncompressedValueLen()[i];
        }
        if (uncompressedColumnSizes != null) {
            for (int i = 0; i < uncompressedColumnSizes.length; i++) {
                uncompressedFileSize += uncompressedColumnSizes[i];
            }
        }
        // calculate no. of rows
        rowNo += keyBuffer.getNumberRows();
    } catch (Throwable e) {
        this.exception = true;
        close();
        throw new IOException(e);
    }
}
Also used : CombineHiveKey(org.apache.hadoop.hive.shims.CombineHiveKey) RCFileKeyBufferWrapper(org.apache.hadoop.hive.ql.io.rcfile.merge.RCFileKeyBufferWrapper) KeyBuffer(org.apache.hadoop.hive.ql.io.RCFile.KeyBuffer) IOException(java.io.IOException)

Aggregations

CombineHiveKey (org.apache.hadoop.hive.shims.CombineHiveKey)5 IOException (java.io.IOException)4 RCFileKeyBufferWrapper (org.apache.hadoop.hive.ql.io.rcfile.merge.RCFileKeyBufferWrapper)3 RCFile (org.apache.hadoop.hive.ql.io.RCFile)2 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)2 Path (org.apache.hadoop.fs.Path)1 CombineHiveInputFormat (org.apache.hadoop.hive.ql.io.CombineHiveInputFormat)1 KeyBuffer (org.apache.hadoop.hive.ql.io.RCFile.KeyBuffer)1 OrcFile (org.apache.hadoop.hive.ql.io.orc.OrcFile)1 OrcFileKeyWrapper (org.apache.hadoop.hive.ql.io.orc.OrcFileKeyWrapper)1 OrcFileValueWrapper (org.apache.hadoop.hive.ql.io.orc.OrcFileValueWrapper)1 RCFileValueBufferWrapper (org.apache.hadoop.hive.ql.io.rcfile.merge.RCFileValueBufferWrapper)1 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)1 InputSplit (org.apache.hadoop.mapred.InputSplit)1 JobConf (org.apache.hadoop.mapred.JobConf)1 RecordWriter (org.apache.hadoop.mapred.RecordWriter)1 Test (org.junit.Test)1