use of org.apache.hadoop.io.WritableComparable in project hive by apache.
the class FileOutputFormatContainer method getRecordWriter.
@Override
public RecordWriter<WritableComparable<?>, HCatRecord> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException {
// this needs to be manually set, under normal circumstances MR Task does this
setWorkOutputPath(context);
// Configure the output key and value classes.
// This is required for writing null as key for file based tables.
context.getConfiguration().set("mapred.output.key.class", NullWritable.class.getName());
String jobInfoString = context.getConfiguration().get(HCatConstants.HCAT_KEY_OUTPUT_INFO);
OutputJobInfo jobInfo = (OutputJobInfo) HCatUtil.deserialize(jobInfoString);
StorerInfo storeInfo = jobInfo.getTableInfo().getStorerInfo();
HiveStorageHandler storageHandler = HCatUtil.getStorageHandler(context.getConfiguration(), storeInfo);
Class<? extends AbstractSerDe> serde = storageHandler.getSerDeClass();
AbstractSerDe sd = (AbstractSerDe) ReflectionUtils.newInstance(serde, context.getConfiguration());
context.getConfiguration().set("mapred.output.value.class", sd.getSerializedClass().getName());
RecordWriter<WritableComparable<?>, HCatRecord> rw;
if (HCatBaseOutputFormat.getJobInfo(context.getConfiguration()).isDynamicPartitioningUsed()) {
// When Dynamic partitioning is used, the RecordWriter instance initialized here isn't used. Can use null.
// (That's because records can't be written until the values of the dynamic partitions are deduced.
// By that time, a new local instance of RecordWriter, with the correct output-path, will be constructed.)
rw = new DynamicPartitionFileRecordWriterContainer((org.apache.hadoop.mapred.RecordWriter) null, context);
} else {
Path parentDir = new Path(context.getConfiguration().get("mapred.work.output.dir"));
Path childPath = new Path(parentDir, FileOutputFormat.getUniqueName(new JobConf(context.getConfiguration()), context.getConfiguration().get("mapreduce.output.basename", "part")));
rw = new StaticPartitionFileRecordWriterContainer(getBaseOutputFormat().getRecordWriter(parentDir.getFileSystem(context.getConfiguration()), new JobConf(context.getConfiguration()), childPath.toString(), InternalUtil.createReporter(context)), context);
}
return rw;
}
use of org.apache.hadoop.io.WritableComparable in project hive by apache.
the class HiveContextAwareRecordReader method doNext.
public boolean doNext(K key, V value) throws IOException {
if (this.isSorted) {
if (this.getIOContext().shouldEndBinarySearch() || (!this.getIOContext().useSorted() && this.wasUsingSortedSearch)) {
beginLinearSearch();
this.wasUsingSortedSearch = false;
this.getIOContext().setEndBinarySearch(false);
}
if (this.getIOContext().useSorted()) {
if (this.genericUDFClassName == null && this.getIOContext().getGenericUDFClassName() != null) {
setGenericUDFClassName(this.getIOContext().getGenericUDFClassName());
}
if (this.getIOContext().isBinarySearching()) {
// Proceed with a binary search
if (this.getIOContext().getComparison() != null) {
switch(this.getIOContext().getComparison()) {
case GREATER:
case EQUAL:
// Indexes have only one entry per value, could go linear from here, if we want to
// use this for any sorted table, we'll need to continue the search
rangeEnd = previousPosition;
break;
case LESS:
rangeStart = previousPosition;
break;
default:
break;
}
}
long position = (rangeStart + rangeEnd) / 2;
sync(position);
long newPosition = getSyncedPosition();
// matching rows must be in the final block, so we can end the binary search.
if (newPosition == previousPosition || newPosition >= splitEnd) {
this.getIOContext().setBinarySearching(false);
sync(rangeStart);
}
previousPosition = newPosition;
} else if (foundAllTargets()) {
// Found all possible rows which will not be filtered
return false;
}
}
}
try {
/**
* When start reading new file, check header, footer rows.
* If file contains header, skip header lines before reading the records.
* If file contains footer, used a FooterBuffer to remove footer lines
* at the end of the table file.
*/
if (this.ioCxtRef.getCurrentBlockStart() == 0) {
// Check if the table file has header to skip.
footerBuffer = null;
Path filePath = this.ioCxtRef.getInputPath();
PartitionDesc part = null;
try {
if (pathToPartitionInfo == null) {
pathToPartitionInfo = Utilities.getMapWork(jobConf).getPathToPartitionInfo();
}
part = HiveFileFormatUtils.getFromPathRecursively(pathToPartitionInfo, filePath, IOPrepareCache.get().getPartitionDescMap());
} catch (AssertionError ae) {
LOG.info("Cannot get partition description from " + this.ioCxtRef.getInputPath() + "because " + ae.getMessage());
part = null;
} catch (Exception e) {
LOG.info("Cannot get partition description from " + this.ioCxtRef.getInputPath() + "because " + e.getMessage());
part = null;
}
TableDesc table = (part == null) ? null : part.getTableDesc();
if (table != null) {
headerCount = Utilities.getHeaderCount(table);
footerCount = Utilities.getFooterCount(table, jobConf);
}
// If input contains header, skip header.
if (!Utilities.skipHeader(recordReader, headerCount, (WritableComparable) key, (Writable) value)) {
return false;
}
if (footerCount > 0) {
footerBuffer = new FooterBuffer();
if (!footerBuffer.initializeBuffer(jobConf, recordReader, footerCount, (WritableComparable) key, (Writable) value)) {
return false;
}
}
}
if (footerBuffer == null) {
// Table files don't have footer rows.
return recordReader.next(key, value);
} else {
return footerBuffer.updateBuffer(jobConf, recordReader, (WritableComparable) key, (Writable) value);
}
} catch (Exception e) {
return HiveIOExceptionHandlerUtil.handleRecordReaderNextException(e, jobConf);
}
}
use of org.apache.hadoop.io.WritableComparable in project hive by apache.
the class RCFileOutputFormat method getRecordWriter.
/**
* {@inheritDoc}
*/
@Override
public RecordWriter<WritableComparable, BytesRefArrayWritable> getRecordWriter(FileSystem ignored, JobConf job, String name, Progressable progress) throws IOException {
Path outputPath = getWorkOutputPath(job);
FileSystem fs = outputPath.getFileSystem(job);
Path file = new Path(outputPath, name);
CompressionCodec codec = null;
if (getCompressOutput(job)) {
Class<?> codecClass = getOutputCompressorClass(job, DefaultCodec.class);
codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, job);
}
final RCFile.Writer out = new RCFile.Writer(fs, job, file, progress, codec);
return new RecordWriter<WritableComparable, BytesRefArrayWritable>() {
@Override
public void close(Reporter reporter) throws IOException {
out.close();
}
@Override
public void write(WritableComparable key, BytesRefArrayWritable value) throws IOException {
out.append(value);
}
};
}
use of org.apache.hadoop.io.WritableComparable in project accumulo by apache.
the class MultiReader method seek.
public synchronized boolean seek(WritableComparable key) throws IOException {
PriorityBuffer reheap = new PriorityBuffer(heap.size());
boolean result = false;
for (Object obj : heap) {
Index index = (Index) obj;
try {
WritableComparable found = index.reader.getClosest(key, index.value, true);
if (found != null && found.equals(key)) {
result = true;
}
} catch (EOFException ex) {
// thrown if key is beyond all data in the map
}
index.cached = false;
reheap.add(index);
}
heap = reheap;
return result;
}
use of org.apache.hadoop.io.WritableComparable in project Plume by tdunning.
the class MSCRCombiner method reduce.
@SuppressWarnings("unchecked")
protected void reduce(final PlumeObject arg0, java.lang.Iterable<PlumeObject> values, Reducer<PlumeObject, PlumeObject, PlumeObject, PlumeObject>.Context context) throws IOException, InterruptedException {
PCollection col = mscr.getChannelByNumber().get(arg0.sourceId);
OutputChannel oC = mscr.getOutputChannels().get(col);
if (oC.combiner != null) {
// Apply combiner function for this channel
List<WritableComparable> vals = Lists.newArrayList();
for (PlumeObject val : values) {
vals.add(val.obj);
}
WritableComparable result = (WritableComparable) oC.combiner.getCombiner().combine(vals);
context.write(arg0, new PlumeObject(result, arg0.sourceId));
} else {
// direct writing - write all key, value pairs
for (PlumeObject val : values) {
context.write(arg0, val);
}
}
}
Aggregations