Search in sources :

Example 61 with Writable

use of org.apache.hadoop.io.Writable in project hive by apache.

the class AppMasterEventOperator method process.

@Override
public void process(Object row, int tag) throws HiveException {
    if (hasReachedMaxSize) {
        return;
    }
    ObjectInspector rowInspector = inputObjInspectors[0];
    try {
        Writable writableRow = serializer.serialize(row, rowInspector);
        writableRow.write(buffer);
        if (buffer.getLength() > MAX_SIZE) {
            if (isLogInfoEnabled) {
                LOG.info("Disabling AM events. Buffer size too large: " + buffer.getLength());
            }
            hasReachedMaxSize = true;
            buffer = null;
        }
    } catch (Exception e) {
        throw new HiveException(e);
    }
    if (isLogDebugEnabled) {
        LOG.debug("AppMasterEvent: " + row);
    }
    forward(row, rowInspector);
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) Writable(org.apache.hadoop.io.Writable) IOException(java.io.IOException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException)

Example 62 with Writable

use of org.apache.hadoop.io.Writable in project hive by apache.

the class HiveContextAwareRecordReader method doNext.

public boolean doNext(K key, V value) throws IOException {
    if (this.isSorted) {
        if (this.getIOContext().shouldEndBinarySearch() || (!this.getIOContext().useSorted() && this.wasUsingSortedSearch)) {
            beginLinearSearch();
            this.wasUsingSortedSearch = false;
            this.getIOContext().setEndBinarySearch(false);
        }
        if (this.getIOContext().useSorted()) {
            if (this.genericUDFClassName == null && this.getIOContext().getGenericUDFClassName() != null) {
                setGenericUDFClassName(this.getIOContext().getGenericUDFClassName());
            }
            if (this.getIOContext().isBinarySearching()) {
                // Proceed with a binary search
                if (this.getIOContext().getComparison() != null) {
                    switch(this.getIOContext().getComparison()) {
                        case GREATER:
                        case EQUAL:
                            // Indexes have only one entry per value, could go linear from here, if we want to
                            // use this for any sorted table, we'll need to continue the search
                            rangeEnd = previousPosition;
                            break;
                        case LESS:
                            rangeStart = previousPosition;
                            break;
                        default:
                            break;
                    }
                }
                long position = (rangeStart + rangeEnd) / 2;
                sync(position);
                long newPosition = getSyncedPosition();
                // matching rows must be in the final block, so we can end the binary search.
                if (newPosition == previousPosition || newPosition >= splitEnd) {
                    this.getIOContext().setBinarySearching(false);
                    sync(rangeStart);
                }
                previousPosition = newPosition;
            } else if (foundAllTargets()) {
                // Found all possible rows which will not be filtered
                return false;
            }
        }
    }
    try {
        /**
       * When start reading new file, check header, footer rows.
       * If file contains header, skip header lines before reading the records.
       * If file contains footer, used a FooterBuffer to remove footer lines
       * at the end of the table file.
       **/
        if (this.ioCxtRef.getCurrentBlockStart() == 0) {
            // Check if the table file has header to skip.
            footerBuffer = null;
            Path filePath = this.ioCxtRef.getInputPath();
            PartitionDesc part = null;
            try {
                if (pathToPartitionInfo == null) {
                    pathToPartitionInfo = Utilities.getMapWork(jobConf).getPathToPartitionInfo();
                }
                part = HiveFileFormatUtils.getPartitionDescFromPathRecursively(pathToPartitionInfo, filePath, IOPrepareCache.get().getPartitionDescMap());
            } catch (AssertionError ae) {
                LOG.info("Cannot get partition description from " + this.ioCxtRef.getInputPath() + "because " + ae.getMessage());
                part = null;
            } catch (Exception e) {
                LOG.info("Cannot get partition description from " + this.ioCxtRef.getInputPath() + "because " + e.getMessage());
                part = null;
            }
            TableDesc table = (part == null) ? null : part.getTableDesc();
            if (table != null) {
                headerCount = Utilities.getHeaderCount(table);
                footerCount = Utilities.getFooterCount(table, jobConf);
            }
            // If input contains header, skip header.
            if (!Utilities.skipHeader(recordReader, headerCount, (WritableComparable) key, (Writable) value)) {
                return false;
            }
            if (footerCount > 0) {
                footerBuffer = new FooterBuffer();
                if (!footerBuffer.initializeBuffer(jobConf, recordReader, footerCount, (WritableComparable) key, (Writable) value)) {
                    return false;
                }
            }
        }
        if (footerBuffer == null) {
            // Table files don't have footer rows.
            return recordReader.next(key, value);
        } else {
            return footerBuffer.updateBuffer(jobConf, recordReader, (WritableComparable) key, (Writable) value);
        }
    } catch (Exception e) {
        return HiveIOExceptionHandlerUtil.handleRecordReaderNextException(e, jobConf);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) WritableComparable(org.apache.hadoop.io.WritableComparable) Writable(org.apache.hadoop.io.Writable) PartitionDesc(org.apache.hadoop.hive.ql.plan.PartitionDesc) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) IOException(java.io.IOException) FooterBuffer(org.apache.hadoop.hive.ql.exec.FooterBuffer)

Example 63 with Writable

use of org.apache.hadoop.io.Writable in project hive by apache.

the class HiveIgnoreKeyTextOutputFormat method getHiveRecordWriter.

/**
   * create the final out file, and output row by row. After one row is
   * appended, a configured row separator is appended
   *
   * @param jc
   *          the job configuration file
   * @param outPath
   *          the final output file to be created
   * @param valueClass
   *          the value class used for create
   * @param isCompressed
   *          whether the content is compressed or not
   * @param tableProperties
   *          the tableProperties of this file's corresponding table
   * @param progress
   *          progress used for status report
   * @return the RecordWriter
   */
@Override
public RecordWriter getHiveRecordWriter(JobConf jc, Path outPath, Class<? extends Writable> valueClass, boolean isCompressed, Properties tableProperties, Progressable progress) throws IOException {
    int rowSeparator = 0;
    String rowSeparatorString = tableProperties.getProperty(serdeConstants.LINE_DELIM, "\n");
    try {
        rowSeparator = Byte.parseByte(rowSeparatorString);
    } catch (NumberFormatException e) {
        rowSeparator = rowSeparatorString.charAt(0);
    }
    final int finalRowSeparator = rowSeparator;
    FileSystem fs = outPath.getFileSystem(jc);
    final OutputStream outStream = Utilities.createCompressedStream(jc, fs.create(outPath, progress), isCompressed);
    return new RecordWriter() {

        @Override
        public void write(Writable r) throws IOException {
            if (r instanceof Text) {
                Text tr = (Text) r;
                outStream.write(tr.getBytes(), 0, tr.getLength());
                outStream.write(finalRowSeparator);
            } else {
                // DynamicSerDe always writes out BytesWritable
                BytesWritable bw = (BytesWritable) r;
                outStream.write(bw.get(), 0, bw.getSize());
                outStream.write(finalRowSeparator);
            }
        }

        @Override
        public void close(boolean abort) throws IOException {
            outStream.close();
        }
    };
}
Also used : RecordWriter(org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter) FileSystem(org.apache.hadoop.fs.FileSystem) OutputStream(java.io.OutputStream) Writable(org.apache.hadoop.io.Writable) BytesWritable(org.apache.hadoop.io.BytesWritable) Text(org.apache.hadoop.io.Text) BytesWritable(org.apache.hadoop.io.BytesWritable)

Example 64 with Writable

use of org.apache.hadoop.io.Writable in project hive by apache.

the class HiveNullValueSequenceFileOutputFormat method getHiveRecordWriter.

@Override
public RecordWriter getHiveRecordWriter(JobConf jc, Path finalOutPath, Class<? extends Writable> valueClass, boolean isCompressed, Properties tableProperties, Progressable progress) throws IOException {
    FileSystem fs = finalOutPath.getFileSystem(jc);
    final SequenceFile.Writer outStream = Utilities.createSequenceWriter(jc, fs, finalOutPath, HiveKey.class, NullWritable.class, isCompressed, progress);
    keyWritable = new HiveKey();
    keyIsText = valueClass.equals(Text.class);
    return new RecordWriter() {

        @Override
        public void write(Writable r) throws IOException {
            if (keyIsText) {
                Text text = (Text) r;
                keyWritable.set(text.getBytes(), 0, text.getLength());
            } else {
                BytesWritable bw = (BytesWritable) r;
                // Once we drop support for old Hadoop versions, change these
                // to getBytes() and getLength() to fix the deprecation warnings.
                // Not worth a shim.
                keyWritable.set(bw.get(), 0, bw.getSize());
            }
            keyWritable.setHashCode(r.hashCode());
            outStream.append(keyWritable, NULL_WRITABLE);
        }

        @Override
        public void close(boolean abort) throws IOException {
            outStream.close();
        }
    };
}
Also used : RecordWriter(org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter) SequenceFile(org.apache.hadoop.io.SequenceFile) FileSystem(org.apache.hadoop.fs.FileSystem) NullWritable(org.apache.hadoop.io.NullWritable) Writable(org.apache.hadoop.io.Writable) BytesWritable(org.apache.hadoop.io.BytesWritable) Text(org.apache.hadoop.io.Text) BytesWritable(org.apache.hadoop.io.BytesWritable)

Example 65 with Writable

use of org.apache.hadoop.io.Writable in project hive by apache.

the class OrcOutputFormat method getRawRecordWriter.

@Override
public org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter getRawRecordWriter(Path path, Options options) throws IOException {
    final Path filename = AcidUtils.createFilename(path, options);
    final OrcFile.WriterOptions opts = OrcFile.writerOptions(options.getConfiguration());
    if (!options.isWritingBase()) {
        opts.bufferSize(OrcRecordUpdater.DELTA_BUFFER_SIZE).stripeSize(OrcRecordUpdater.DELTA_STRIPE_SIZE).blockPadding(false).compress(CompressionKind.NONE).rowIndexStride(0);
    }
    final OrcRecordUpdater.KeyIndexBuilder watcher = new OrcRecordUpdater.KeyIndexBuilder();
    opts.inspector(options.getInspector()).callback(watcher);
    final Writer writer = OrcFile.createWriter(filename, opts);
    return new org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter() {

        @Override
        public void write(Writable w) throws IOException {
            OrcStruct orc = (OrcStruct) w;
            watcher.addKey(((IntWritable) orc.getFieldValue(OrcRecordUpdater.OPERATION)).get(), ((LongWritable) orc.getFieldValue(OrcRecordUpdater.ORIGINAL_TRANSACTION)).get(), ((IntWritable) orc.getFieldValue(OrcRecordUpdater.BUCKET)).get(), ((LongWritable) orc.getFieldValue(OrcRecordUpdater.ROW_ID)).get());
            writer.addRow(w);
        }

        @Override
        public void close(boolean abort) throws IOException {
            writer.close();
        }
    };
}
Also used : Path(org.apache.hadoop.fs.Path) NullWritable(org.apache.hadoop.io.NullWritable) Writable(org.apache.hadoop.io.Writable) LongWritable(org.apache.hadoop.io.LongWritable) IntWritable(org.apache.hadoop.io.IntWritable) StatsProvidingRecordWriter(org.apache.hadoop.hive.ql.io.StatsProvidingRecordWriter) RecordWriter(org.apache.hadoop.mapred.RecordWriter) StatsProvidingRecordWriter(org.apache.hadoop.hive.ql.io.StatsProvidingRecordWriter) RecordWriter(org.apache.hadoop.mapred.RecordWriter)

Aggregations

Writable (org.apache.hadoop.io.Writable)221 IntWritable (org.apache.hadoop.io.IntWritable)103 LongWritable (org.apache.hadoop.io.LongWritable)91 BooleanWritable (org.apache.hadoop.io.BooleanWritable)75 BytesWritable (org.apache.hadoop.io.BytesWritable)74 FloatWritable (org.apache.hadoop.io.FloatWritable)73 Test (org.junit.Test)68 IOException (java.io.IOException)43 Path (org.apache.hadoop.fs.Path)43 Text (org.apache.hadoop.io.Text)40 ArrayWritable (org.apache.hadoop.io.ArrayWritable)37 ShortWritable (org.apache.hadoop.hive.serde2.io.ShortWritable)34 SequenceFile (org.apache.hadoop.io.SequenceFile)32 Configuration (org.apache.hadoop.conf.Configuration)31 DoubleWritable (org.apache.hadoop.io.DoubleWritable)30 DoubleWritable (org.apache.hadoop.hive.serde2.io.DoubleWritable)29 ByteWritable (org.apache.hadoop.io.ByteWritable)28 ByteWritable (org.apache.hadoop.hive.serde2.io.ByteWritable)25 FileSystem (org.apache.hadoop.fs.FileSystem)24 ArrayList (java.util.ArrayList)23