use of org.apache.hadoop.io.BytesWritable in project hadoop by apache.
the class SequenceFileAsBinaryOutputFormat method getRecordWriter.
@Override
public RecordWriter<BytesWritable, BytesWritable> getRecordWriter(FileSystem ignored, JobConf job, String name, Progressable progress) throws IOException {
// get the path of the temporary output file
Path file = FileOutputFormat.getTaskOutputPath(job, name);
FileSystem fs = file.getFileSystem(job);
CompressionCodec codec = null;
CompressionType compressionType = CompressionType.NONE;
if (getCompressOutput(job)) {
// find the kind of compression to do
compressionType = getOutputCompressionType(job);
// find the right codec
Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, DefaultCodec.class);
codec = ReflectionUtils.newInstance(codecClass, job);
}
final SequenceFile.Writer out = SequenceFile.createWriter(fs, job, file, getSequenceFileOutputKeyClass(job), getSequenceFileOutputValueClass(job), compressionType, codec, progress);
return new RecordWriter<BytesWritable, BytesWritable>() {
private WritableValueBytes wvaluebytes = new WritableValueBytes();
public void write(BytesWritable bkey, BytesWritable bvalue) throws IOException {
wvaluebytes.reset(bvalue);
out.appendRaw(bkey.getBytes(), 0, bkey.getLength(), wvaluebytes);
wvaluebytes.reset(null);
}
public void close(Reporter reporter) throws IOException {
out.close();
}
};
}
use of org.apache.hadoop.io.BytesWritable in project hadoop by apache.
the class TestTFileSeek method createTFile.
private void createTFile() throws IOException {
long totalBytes = 0;
FSDataOutputStream fout = createFSOutput(path, fs);
try {
Writer writer = new Writer(fout, options.minBlockSize, options.compress, "memcmp", conf);
try {
BytesWritable key = new BytesWritable();
BytesWritable val = new BytesWritable();
timer.start();
for (long i = 0; true; ++i) {
if (i % 1000 == 0) {
// test the size for every 1000 rows.
if (fs.getFileStatus(path).getLen() >= options.fileSize) {
break;
}
}
kvGen.next(key, val, false);
writer.append(key.getBytes(), 0, key.getLength(), val.getBytes(), 0, val.getLength());
totalBytes += key.getLength();
totalBytes += val.getLength();
}
timer.stop();
} finally {
writer.close();
}
} finally {
fout.close();
}
// in us.
double duration = (double) timer.read() / 1000;
long fsize = fs.getFileStatus(path).getLen();
System.out.printf("time: %s...uncompressed: %.2fMB...raw thrpt: %.2fMB/s\n", timer.toString(), (double) totalBytes / 1024 / 1024, totalBytes / duration);
System.out.printf("time: %s...file size: %.2fMB...disk thrpt: %.2fMB/s\n", timer.toString(), (double) fsize / 1024 / 1024, fsize / duration);
}
use of org.apache.hadoop.io.BytesWritable in project hadoop by apache.
the class TestTFileSeek method seekTFile.
public void seekTFile() throws IOException {
int miss = 0;
long totalBytes = 0;
FSDataInputStream fsdis = fs.open(path);
Reader reader = new Reader(fsdis, fs.getFileStatus(path).getLen(), conf);
KeySampler kSampler = new KeySampler(rng, reader.getFirstKey(), reader.getLastKey(), keyLenGen);
Scanner scanner = reader.createScanner();
BytesWritable key = new BytesWritable();
BytesWritable val = new BytesWritable();
timer.reset();
timer.start();
for (int i = 0; i < options.seekCount; ++i) {
kSampler.next(key);
scanner.lowerBound(key.getBytes(), 0, key.getLength());
if (!scanner.atEnd()) {
scanner.entry().get(key, val);
totalBytes += key.getLength();
totalBytes += val.getLength();
} else {
++miss;
}
}
timer.stop();
// in us.
double duration = (double) timer.read() / 1000;
System.out.printf("time: %s...avg seek: %s...%d hit...%d miss...avg I/O size: %.2fKB\n", timer.toString(), NanoTimer.nanoTimeToString(timer.read() / options.seekCount), options.seekCount - miss, miss, (double) totalBytes / 1024 / (options.seekCount - miss));
}
use of org.apache.hadoop.io.BytesWritable in project hive by apache.
the class DelimitedInputWriter method encode.
@Override
public Object encode(byte[] record) throws SerializationError {
try {
BytesWritable blob = new BytesWritable();
blob.set(record, 0, record.length);
return serde.deserialize(blob);
} catch (SerDeException e) {
throw new SerializationError("Unable to convert byte[] record into Object", e);
}
}
use of org.apache.hadoop.io.BytesWritable in project druid by druid-io.
the class IndexGeneratorCombinerTest method testMultipleRowsNotMerged.
@Test
public void testMultipleRowsNotMerged() throws Exception {
long timestamp = System.currentTimeMillis();
Bucket bucket = new Bucket(0, new DateTime(timestamp), 0);
SortableBytes keySortableBytes = new SortableBytes(bucket.toGroupKey(), new byte[0]);
BytesWritable key = keySortableBytes.toBytesWritable();
InputRow row1 = new MapBasedInputRow(timestamp, ImmutableList.<String>of("host", "keywords"), ImmutableMap.<String, Object>of("host", "host1", "keywords", Arrays.asList("foo", "bar"), "visited", 10));
InputRow row2 = new MapBasedInputRow(timestamp, ImmutableList.<String>of("host", "keywords"), ImmutableMap.<String, Object>of("host", "host2", "keywords", Arrays.asList("foo", "bar"), "visited", 5));
List<BytesWritable> rows = Lists.newArrayList(new BytesWritable(InputRowSerde.toBytes(row1, aggregators, true)), new BytesWritable(InputRowSerde.toBytes(row2, aggregators, true)));
Reducer.Context context = EasyMock.createNiceMock(Reducer.Context.class);
Capture<BytesWritable> captureKey1 = Capture.newInstance();
Capture<BytesWritable> captureVal1 = Capture.newInstance();
Capture<BytesWritable> captureKey2 = Capture.newInstance();
Capture<BytesWritable> captureVal2 = Capture.newInstance();
context.write(EasyMock.capture(captureKey1), EasyMock.capture(captureVal1));
context.write(EasyMock.capture(captureKey2), EasyMock.capture(captureVal2));
EasyMock.replay(context);
combiner.reduce(key, rows, context);
EasyMock.verify(context);
Assert.assertTrue(captureKey1.getValue() == key);
Assert.assertTrue(captureKey2.getValue() == key);
InputRow capturedRow1 = InputRowSerde.fromBytes(captureVal1.getValue().getBytes(), aggregators);
Assert.assertEquals(Arrays.asList("host", "keywords"), capturedRow1.getDimensions());
Assert.assertEquals(Arrays.asList("host1"), capturedRow1.getDimension("host"));
Assert.assertEquals(Arrays.asList("bar", "foo"), capturedRow1.getDimension("keywords"));
Assert.assertEquals(10, capturedRow1.getLongMetric("visited_sum"));
Assert.assertEquals(1.0, (Double) HyperUniquesAggregatorFactory.estimateCardinality(capturedRow1.getRaw("unique_hosts")), 0.001);
InputRow capturedRow2 = InputRowSerde.fromBytes(captureVal2.getValue().getBytes(), aggregators);
Assert.assertEquals(Arrays.asList("host", "keywords"), capturedRow2.getDimensions());
Assert.assertEquals(Arrays.asList("host2"), capturedRow2.getDimension("host"));
Assert.assertEquals(Arrays.asList("bar", "foo"), capturedRow2.getDimension("keywords"));
Assert.assertEquals(5, capturedRow2.getLongMetric("visited_sum"));
Assert.assertEquals(1.0, (Double) HyperUniquesAggregatorFactory.estimateCardinality(capturedRow2.getRaw("unique_hosts")), 0.001);
}
Aggregations