Search in sources :

Example 36 with BytesWritable

use of org.apache.hadoop.io.BytesWritable in project hadoop by apache.

the class SequenceFileAsBinaryOutputFormat method getRecordWriter.

@Override
public RecordWriter<BytesWritable, BytesWritable> getRecordWriter(FileSystem ignored, JobConf job, String name, Progressable progress) throws IOException {
    // get the path of the temporary output file 
    Path file = FileOutputFormat.getTaskOutputPath(job, name);
    FileSystem fs = file.getFileSystem(job);
    CompressionCodec codec = null;
    CompressionType compressionType = CompressionType.NONE;
    if (getCompressOutput(job)) {
        // find the kind of compression to do
        compressionType = getOutputCompressionType(job);
        // find the right codec
        Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, DefaultCodec.class);
        codec = ReflectionUtils.newInstance(codecClass, job);
    }
    final SequenceFile.Writer out = SequenceFile.createWriter(fs, job, file, getSequenceFileOutputKeyClass(job), getSequenceFileOutputValueClass(job), compressionType, codec, progress);
    return new RecordWriter<BytesWritable, BytesWritable>() {

        private WritableValueBytes wvaluebytes = new WritableValueBytes();

        public void write(BytesWritable bkey, BytesWritable bvalue) throws IOException {
            wvaluebytes.reset(bvalue);
            out.appendRaw(bkey.getBytes(), 0, bkey.getLength(), wvaluebytes);
            wvaluebytes.reset(null);
        }

        public void close(Reporter reporter) throws IOException {
            out.close();
        }
    };
}
Also used : Path(org.apache.hadoop.fs.Path) SequenceFile(org.apache.hadoop.io.SequenceFile) FileSystem(org.apache.hadoop.fs.FileSystem) BytesWritable(org.apache.hadoop.io.BytesWritable) CompressionCodec(org.apache.hadoop.io.compress.CompressionCodec) CompressionType(org.apache.hadoop.io.SequenceFile.CompressionType)

Example 37 with BytesWritable

use of org.apache.hadoop.io.BytesWritable in project hadoop by apache.

the class TestTFileSeek method createTFile.

private void createTFile() throws IOException {
    long totalBytes = 0;
    FSDataOutputStream fout = createFSOutput(path, fs);
    try {
        Writer writer = new Writer(fout, options.minBlockSize, options.compress, "memcmp", conf);
        try {
            BytesWritable key = new BytesWritable();
            BytesWritable val = new BytesWritable();
            timer.start();
            for (long i = 0; true; ++i) {
                if (i % 1000 == 0) {
                    // test the size for every 1000 rows.
                    if (fs.getFileStatus(path).getLen() >= options.fileSize) {
                        break;
                    }
                }
                kvGen.next(key, val, false);
                writer.append(key.getBytes(), 0, key.getLength(), val.getBytes(), 0, val.getLength());
                totalBytes += key.getLength();
                totalBytes += val.getLength();
            }
            timer.stop();
        } finally {
            writer.close();
        }
    } finally {
        fout.close();
    }
    // in us.
    double duration = (double) timer.read() / 1000;
    long fsize = fs.getFileStatus(path).getLen();
    System.out.printf("time: %s...uncompressed: %.2fMB...raw thrpt: %.2fMB/s\n", timer.toString(), (double) totalBytes / 1024 / 1024, totalBytes / duration);
    System.out.printf("time: %s...file size: %.2fMB...disk thrpt: %.2fMB/s\n", timer.toString(), (double) fsize / 1024 / 1024, fsize / duration);
}
Also used : BytesWritable(org.apache.hadoop.io.BytesWritable) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) Writer(org.apache.hadoop.io.file.tfile.TFile.Writer)

Example 38 with BytesWritable

use of org.apache.hadoop.io.BytesWritable in project hadoop by apache.

the class TestTFileSeek method seekTFile.

public void seekTFile() throws IOException {
    int miss = 0;
    long totalBytes = 0;
    FSDataInputStream fsdis = fs.open(path);
    Reader reader = new Reader(fsdis, fs.getFileStatus(path).getLen(), conf);
    KeySampler kSampler = new KeySampler(rng, reader.getFirstKey(), reader.getLastKey(), keyLenGen);
    Scanner scanner = reader.createScanner();
    BytesWritable key = new BytesWritable();
    BytesWritable val = new BytesWritable();
    timer.reset();
    timer.start();
    for (int i = 0; i < options.seekCount; ++i) {
        kSampler.next(key);
        scanner.lowerBound(key.getBytes(), 0, key.getLength());
        if (!scanner.atEnd()) {
            scanner.entry().get(key, val);
            totalBytes += key.getLength();
            totalBytes += val.getLength();
        } else {
            ++miss;
        }
    }
    timer.stop();
    // in us.
    double duration = (double) timer.read() / 1000;
    System.out.printf("time: %s...avg seek: %s...%d hit...%d miss...avg I/O size: %.2fKB\n", timer.toString(), NanoTimer.nanoTimeToString(timer.read() / options.seekCount), options.seekCount - miss, miss, (double) totalBytes / 1024 / (options.seekCount - miss));
}
Also used : Scanner(org.apache.hadoop.io.file.tfile.TFile.Reader.Scanner) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) Reader(org.apache.hadoop.io.file.tfile.TFile.Reader) BytesWritable(org.apache.hadoop.io.BytesWritable)

Example 39 with BytesWritable

use of org.apache.hadoop.io.BytesWritable in project hive by apache.

the class DelimitedInputWriter method encode.

@Override
public Object encode(byte[] record) throws SerializationError {
    try {
        BytesWritable blob = new BytesWritable();
        blob.set(record, 0, record.length);
        return serde.deserialize(blob);
    } catch (SerDeException e) {
        throw new SerializationError("Unable to convert byte[] record into Object", e);
    }
}
Also used : BytesWritable(org.apache.hadoop.io.BytesWritable) SerDeException(org.apache.hadoop.hive.serde2.SerDeException)

Example 40 with BytesWritable

use of org.apache.hadoop.io.BytesWritable in project druid by druid-io.

the class IndexGeneratorCombinerTest method testMultipleRowsNotMerged.

@Test
public void testMultipleRowsNotMerged() throws Exception {
    long timestamp = System.currentTimeMillis();
    Bucket bucket = new Bucket(0, new DateTime(timestamp), 0);
    SortableBytes keySortableBytes = new SortableBytes(bucket.toGroupKey(), new byte[0]);
    BytesWritable key = keySortableBytes.toBytesWritable();
    InputRow row1 = new MapBasedInputRow(timestamp, ImmutableList.<String>of("host", "keywords"), ImmutableMap.<String, Object>of("host", "host1", "keywords", Arrays.asList("foo", "bar"), "visited", 10));
    InputRow row2 = new MapBasedInputRow(timestamp, ImmutableList.<String>of("host", "keywords"), ImmutableMap.<String, Object>of("host", "host2", "keywords", Arrays.asList("foo", "bar"), "visited", 5));
    List<BytesWritable> rows = Lists.newArrayList(new BytesWritable(InputRowSerde.toBytes(row1, aggregators, true)), new BytesWritable(InputRowSerde.toBytes(row2, aggregators, true)));
    Reducer.Context context = EasyMock.createNiceMock(Reducer.Context.class);
    Capture<BytesWritable> captureKey1 = Capture.newInstance();
    Capture<BytesWritable> captureVal1 = Capture.newInstance();
    Capture<BytesWritable> captureKey2 = Capture.newInstance();
    Capture<BytesWritable> captureVal2 = Capture.newInstance();
    context.write(EasyMock.capture(captureKey1), EasyMock.capture(captureVal1));
    context.write(EasyMock.capture(captureKey2), EasyMock.capture(captureVal2));
    EasyMock.replay(context);
    combiner.reduce(key, rows, context);
    EasyMock.verify(context);
    Assert.assertTrue(captureKey1.getValue() == key);
    Assert.assertTrue(captureKey2.getValue() == key);
    InputRow capturedRow1 = InputRowSerde.fromBytes(captureVal1.getValue().getBytes(), aggregators);
    Assert.assertEquals(Arrays.asList("host", "keywords"), capturedRow1.getDimensions());
    Assert.assertEquals(Arrays.asList("host1"), capturedRow1.getDimension("host"));
    Assert.assertEquals(Arrays.asList("bar", "foo"), capturedRow1.getDimension("keywords"));
    Assert.assertEquals(10, capturedRow1.getLongMetric("visited_sum"));
    Assert.assertEquals(1.0, (Double) HyperUniquesAggregatorFactory.estimateCardinality(capturedRow1.getRaw("unique_hosts")), 0.001);
    InputRow capturedRow2 = InputRowSerde.fromBytes(captureVal2.getValue().getBytes(), aggregators);
    Assert.assertEquals(Arrays.asList("host", "keywords"), capturedRow2.getDimensions());
    Assert.assertEquals(Arrays.asList("host2"), capturedRow2.getDimension("host"));
    Assert.assertEquals(Arrays.asList("bar", "foo"), capturedRow2.getDimension("keywords"));
    Assert.assertEquals(5, capturedRow2.getLongMetric("visited_sum"));
    Assert.assertEquals(1.0, (Double) HyperUniquesAggregatorFactory.estimateCardinality(capturedRow2.getRaw("unique_hosts")), 0.001);
}
Also used : MapBasedInputRow(io.druid.data.input.MapBasedInputRow) InputRow(io.druid.data.input.InputRow) BytesWritable(org.apache.hadoop.io.BytesWritable) MapBasedInputRow(io.druid.data.input.MapBasedInputRow) Reducer(org.apache.hadoop.mapreduce.Reducer) DateTime(org.joda.time.DateTime) Test(org.junit.Test)

Aggregations

BytesWritable (org.apache.hadoop.io.BytesWritable)339 Test (org.junit.Test)92 Text (org.apache.hadoop.io.Text)81 LongWritable (org.apache.hadoop.io.LongWritable)66 IntWritable (org.apache.hadoop.io.IntWritable)54 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)51 ArrayList (java.util.ArrayList)48 List (java.util.List)48 Path (org.apache.hadoop.fs.Path)47 IOException (java.io.IOException)42 Configuration (org.apache.hadoop.conf.Configuration)41 FloatWritable (org.apache.hadoop.io.FloatWritable)37 Writable (org.apache.hadoop.io.Writable)36 BooleanWritable (org.apache.hadoop.io.BooleanWritable)35 FileSystem (org.apache.hadoop.fs.FileSystem)28 SequenceFile (org.apache.hadoop.io.SequenceFile)27 DoubleWritable (org.apache.hadoop.hive.serde2.io.DoubleWritable)26 ShortWritable (org.apache.hadoop.hive.serde2.io.ShortWritable)26 HiveDecimalWritable (org.apache.hadoop.hive.serde2.io.HiveDecimalWritable)25 Random (java.util.Random)24