Search in sources :

Example 6 with WritableComparable

use of org.apache.hadoop.io.WritableComparable in project hive by apache.

the class FileOutputFormatContainer method getRecordWriter.

@Override
public RecordWriter<WritableComparable<?>, HCatRecord> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException {
    // this needs to be manually set, under normal circumstances MR Task does this
    setWorkOutputPath(context);
    // Configure the output key and value classes.
    // This is required for writing null as key for file based tables.
    context.getConfiguration().set("mapred.output.key.class", NullWritable.class.getName());
    String jobInfoString = context.getConfiguration().get(HCatConstants.HCAT_KEY_OUTPUT_INFO);
    OutputJobInfo jobInfo = (OutputJobInfo) HCatUtil.deserialize(jobInfoString);
    StorerInfo storeInfo = jobInfo.getTableInfo().getStorerInfo();
    HiveStorageHandler storageHandler = HCatUtil.getStorageHandler(context.getConfiguration(), storeInfo);
    Class<? extends AbstractSerDe> serde = storageHandler.getSerDeClass();
    AbstractSerDe sd = (AbstractSerDe) ReflectionUtils.newInstance(serde, context.getConfiguration());
    context.getConfiguration().set("mapred.output.value.class", sd.getSerializedClass().getName());
    RecordWriter<WritableComparable<?>, HCatRecord> rw;
    if (HCatBaseOutputFormat.getJobInfo(context.getConfiguration()).isDynamicPartitioningUsed()) {
        // When Dynamic partitioning is used, the RecordWriter instance initialized here isn't used. Can use null.
        // (That's because records can't be written until the values of the dynamic partitions are deduced.
        // By that time, a new local instance of RecordWriter, with the correct output-path, will be constructed.)
        rw = new DynamicPartitionFileRecordWriterContainer((org.apache.hadoop.mapred.RecordWriter) null, context);
    } else {
        Path parentDir = new Path(context.getConfiguration().get("mapred.work.output.dir"));
        Path childPath = new Path(parentDir, FileOutputFormat.getUniqueName(new JobConf(context.getConfiguration()), context.getConfiguration().get("mapreduce.output.basename", "part")));
        rw = new StaticPartitionFileRecordWriterContainer(getBaseOutputFormat().getRecordWriter(parentDir.getFileSystem(context.getConfiguration()), new JobConf(context.getConfiguration()), childPath.toString(), InternalUtil.createReporter(context)), context);
    }
    return rw;
}
Also used : Path(org.apache.hadoop.fs.Path) HiveStorageHandler(org.apache.hadoop.hive.ql.metadata.HiveStorageHandler) NullWritable(org.apache.hadoop.io.NullWritable) AbstractSerDe(org.apache.hadoop.hive.serde2.AbstractSerDe) RecordWriter(org.apache.hadoop.mapreduce.RecordWriter) WritableComparable(org.apache.hadoop.io.WritableComparable) JobConf(org.apache.hadoop.mapred.JobConf) HCatRecord(org.apache.hive.hcatalog.data.HCatRecord)

Example 7 with WritableComparable

use of org.apache.hadoop.io.WritableComparable in project hive by apache.

the class HiveContextAwareRecordReader method doNext.

public boolean doNext(K key, V value) throws IOException {
    if (this.isSorted) {
        if (this.getIOContext().shouldEndBinarySearch() || (!this.getIOContext().useSorted() && this.wasUsingSortedSearch)) {
            beginLinearSearch();
            this.wasUsingSortedSearch = false;
            this.getIOContext().setEndBinarySearch(false);
        }
        if (this.getIOContext().useSorted()) {
            if (this.genericUDFClassName == null && this.getIOContext().getGenericUDFClassName() != null) {
                setGenericUDFClassName(this.getIOContext().getGenericUDFClassName());
            }
            if (this.getIOContext().isBinarySearching()) {
                // Proceed with a binary search
                if (this.getIOContext().getComparison() != null) {
                    switch(this.getIOContext().getComparison()) {
                        case GREATER:
                        case EQUAL:
                            // Indexes have only one entry per value, could go linear from here, if we want to
                            // use this for any sorted table, we'll need to continue the search
                            rangeEnd = previousPosition;
                            break;
                        case LESS:
                            rangeStart = previousPosition;
                            break;
                        default:
                            break;
                    }
                }
                long position = (rangeStart + rangeEnd) / 2;
                sync(position);
                long newPosition = getSyncedPosition();
                // matching rows must be in the final block, so we can end the binary search.
                if (newPosition == previousPosition || newPosition >= splitEnd) {
                    this.getIOContext().setBinarySearching(false);
                    sync(rangeStart);
                }
                previousPosition = newPosition;
            } else if (foundAllTargets()) {
                // Found all possible rows which will not be filtered
                return false;
            }
        }
    }
    try {
        /**
         * When start reading new file, check header, footer rows.
         * If file contains header, skip header lines before reading the records.
         * If file contains footer, used a FooterBuffer to remove footer lines
         * at the end of the table file.
         */
        if (this.ioCxtRef.getCurrentBlockStart() == 0) {
            // Check if the table file has header to skip.
            footerBuffer = null;
            Path filePath = this.ioCxtRef.getInputPath();
            PartitionDesc part = null;
            try {
                if (pathToPartitionInfo == null) {
                    pathToPartitionInfo = Utilities.getMapWork(jobConf).getPathToPartitionInfo();
                }
                part = HiveFileFormatUtils.getFromPathRecursively(pathToPartitionInfo, filePath, IOPrepareCache.get().getPartitionDescMap());
            } catch (AssertionError ae) {
                LOG.info("Cannot get partition description from " + this.ioCxtRef.getInputPath() + "because " + ae.getMessage());
                part = null;
            } catch (Exception e) {
                LOG.info("Cannot get partition description from " + this.ioCxtRef.getInputPath() + "because " + e.getMessage());
                part = null;
            }
            TableDesc table = (part == null) ? null : part.getTableDesc();
            if (table != null) {
                headerCount = Utilities.getHeaderCount(table);
                footerCount = Utilities.getFooterCount(table, jobConf);
            }
            // If input contains header, skip header.
            if (!Utilities.skipHeader(recordReader, headerCount, (WritableComparable) key, (Writable) value)) {
                return false;
            }
            if (footerCount > 0) {
                footerBuffer = new FooterBuffer();
                if (!footerBuffer.initializeBuffer(jobConf, recordReader, footerCount, (WritableComparable) key, (Writable) value)) {
                    return false;
                }
            }
        }
        if (footerBuffer == null) {
            // Table files don't have footer rows.
            return recordReader.next(key, value);
        } else {
            return footerBuffer.updateBuffer(jobConf, recordReader, (WritableComparable) key, (Writable) value);
        }
    } catch (Exception e) {
        return HiveIOExceptionHandlerUtil.handleRecordReaderNextException(e, jobConf);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) WritableComparable(org.apache.hadoop.io.WritableComparable) Writable(org.apache.hadoop.io.Writable) PartitionDesc(org.apache.hadoop.hive.ql.plan.PartitionDesc) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) IOException(java.io.IOException) FooterBuffer(org.apache.hadoop.hive.ql.exec.FooterBuffer)

Example 8 with WritableComparable

use of org.apache.hadoop.io.WritableComparable in project hive by apache.

the class RCFileOutputFormat method getRecordWriter.

/**
 * {@inheritDoc}
 */
@Override
public RecordWriter<WritableComparable, BytesRefArrayWritable> getRecordWriter(FileSystem ignored, JobConf job, String name, Progressable progress) throws IOException {
    Path outputPath = getWorkOutputPath(job);
    FileSystem fs = outputPath.getFileSystem(job);
    Path file = new Path(outputPath, name);
    CompressionCodec codec = null;
    if (getCompressOutput(job)) {
        Class<?> codecClass = getOutputCompressorClass(job, DefaultCodec.class);
        codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, job);
    }
    final RCFile.Writer out = new RCFile.Writer(fs, job, file, progress, codec);
    return new RecordWriter<WritableComparable, BytesRefArrayWritable>() {

        @Override
        public void close(Reporter reporter) throws IOException {
            out.close();
        }

        @Override
        public void write(WritableComparable key, BytesRefArrayWritable value) throws IOException {
            out.append(value);
        }
    };
}
Also used : Path(org.apache.hadoop.fs.Path) RecordWriter(org.apache.hadoop.mapred.RecordWriter) BytesRefArrayWritable(org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable) WritableComparable(org.apache.hadoop.io.WritableComparable) FileSystem(org.apache.hadoop.fs.FileSystem) Reporter(org.apache.hadoop.mapred.Reporter) CompressionCodec(org.apache.hadoop.io.compress.CompressionCodec) RecordWriter(org.apache.hadoop.mapred.RecordWriter)

Example 9 with WritableComparable

use of org.apache.hadoop.io.WritableComparable in project accumulo by apache.

the class MultiReader method seek.

public synchronized boolean seek(WritableComparable key) throws IOException {
    PriorityBuffer reheap = new PriorityBuffer(heap.size());
    boolean result = false;
    for (Object obj : heap) {
        Index index = (Index) obj;
        try {
            WritableComparable found = index.reader.getClosest(key, index.value, true);
            if (found != null && found.equals(key)) {
                result = true;
            }
        } catch (EOFException ex) {
        // thrown if key is beyond all data in the map
        }
        index.cached = false;
        reheap.add(index);
    }
    heap = reheap;
    return result;
}
Also used : WritableComparable(org.apache.hadoop.io.WritableComparable) EOFException(java.io.EOFException) PriorityBuffer(org.apache.commons.collections.buffer.PriorityBuffer)

Example 10 with WritableComparable

use of org.apache.hadoop.io.WritableComparable in project Plume by tdunning.

the class MSCRCombiner method reduce.

@SuppressWarnings("unchecked")
protected void reduce(final PlumeObject arg0, java.lang.Iterable<PlumeObject> values, Reducer<PlumeObject, PlumeObject, PlumeObject, PlumeObject>.Context context) throws IOException, InterruptedException {
    PCollection col = mscr.getChannelByNumber().get(arg0.sourceId);
    OutputChannel oC = mscr.getOutputChannels().get(col);
    if (oC.combiner != null) {
        // Apply combiner function for this channel
        List<WritableComparable> vals = Lists.newArrayList();
        for (PlumeObject val : values) {
            vals.add(val.obj);
        }
        WritableComparable result = (WritableComparable) oC.combiner.getCombiner().combine(vals);
        context.write(arg0, new PlumeObject(result, arg0.sourceId));
    } else {
        // direct writing - write all key, value pairs
        for (PlumeObject val : values) {
            context.write(arg0, val);
        }
    }
}
Also used : PCollection(com.tdunning.plume.PCollection) WritableComparable(org.apache.hadoop.io.WritableComparable) PlumeObject(com.tdunning.plume.local.lazy.MapRedExecutor.PlumeObject) OutputChannel(com.tdunning.plume.local.lazy.MSCR.OutputChannel)

Aggregations

WritableComparable (org.apache.hadoop.io.WritableComparable)34 IOException (java.io.IOException)14 Writable (org.apache.hadoop.io.Writable)14 Path (org.apache.hadoop.fs.Path)13 FileSystem (org.apache.hadoop.fs.FileSystem)11 JobConf (org.apache.hadoop.mapred.JobConf)6 CompressionCodec (org.apache.hadoop.io.compress.CompressionCodec)5 ArrayList (java.util.ArrayList)4 IntWritable (org.apache.hadoop.io.IntWritable)4 NullWritable (org.apache.hadoop.io.NullWritable)4 SequenceFile (org.apache.hadoop.io.SequenceFile)4 TaskAttemptContext (org.apache.hadoop.mapreduce.TaskAttemptContext)4 PCollection (com.tdunning.plume.PCollection)3 OutputChannel (com.tdunning.plume.local.lazy.MSCR.OutputChannel)3 PlumeObject (com.tdunning.plume.local.lazy.MapRedExecutor.PlumeObject)3 HashMap (java.util.HashMap)3 BytesWritable (org.apache.hadoop.io.BytesWritable)3 FloatWritable (org.apache.hadoop.io.FloatWritable)3 HCatRecord (org.apache.hive.hcatalog.data.HCatRecord)3 DoFn (com.tdunning.plume.DoFn)2