Search in sources :

Example 11 with Reporter

use of org.apache.hadoop.mapred.Reporter in project trevni by cutting.

the class AvroTrevniOutputFormat method getRecordWriter.

@Override
public RecordWriter<AvroWrapper<T>, NullWritable> getRecordWriter(FileSystem ignore, final JobConf job, final String name, Progressable prog) throws IOException {
    boolean isMapOnly = job.getNumReduceTasks() == 0;
    final Schema schema = isMapOnly ? AvroJob.getMapOutputSchema(job) : AvroJob.getOutputSchema(job);
    final ColumnFileMetaData meta = new ColumnFileMetaData();
    for (Map.Entry<String, String> e : job) if (e.getKey().startsWith(META_PREFIX))
        meta.put(e.getKey().substring(AvroJob.TEXT_PREFIX.length()), e.getValue().getBytes(MetaData.UTF8));
    final Path dir = FileOutputFormat.getTaskOutputPath(job, name);
    final FileSystem fs = dir.getFileSystem(job);
    if (!fs.mkdirs(dir))
        throw new IOException("Failed to create directory: " + dir);
    final long blockSize = fs.getDefaultBlockSize();
    return new RecordWriter<AvroWrapper<T>, NullWritable>() {

        private int part = 0;

        private AvroColumnWriter<T> writer = new AvroColumnWriter<T>(schema, meta, ReflectData.get());

        private void flush() throws IOException {
            OutputStream out = fs.create(new Path(dir, "part-" + (part++) + EXT));
            try {
                writer.writeTo(out);
            } finally {
                out.close();
            }
            writer = new AvroColumnWriter<T>(schema, meta, ReflectData.get());
        }

        public void write(AvroWrapper<T> wrapper, NullWritable ignore) throws IOException {
            writer.write(wrapper.datum());
            if (// block full
            writer.sizeEstimate() >= blockSize)
                flush();
        }

        public void close(Reporter reporter) throws IOException {
            flush();
        }
    };
}
Also used : Path(org.apache.hadoop.fs.Path) Schema(org.apache.avro.Schema) OutputStream(java.io.OutputStream) Reporter(org.apache.hadoop.mapred.Reporter) IOException(java.io.IOException) NullWritable(org.apache.hadoop.io.NullWritable) RecordWriter(org.apache.hadoop.mapred.RecordWriter) ColumnFileMetaData(org.apache.trevni.ColumnFileMetaData) FileSystem(org.apache.hadoop.fs.FileSystem) AvroWrapper(org.apache.avro.mapred.AvroWrapper) Map(java.util.Map)

Example 12 with Reporter

use of org.apache.hadoop.mapred.Reporter in project hbase by apache.

the class TestGroupingTableMap method shouldCreateNewKey.

@Test
@SuppressWarnings({ "deprecation" })
public void shouldCreateNewKey() throws Exception {
    GroupingTableMap gTableMap = null;
    try {
        Result result = mock(Result.class);
        Reporter reporter = mock(Reporter.class);
        final byte[] bSeparator = Bytes.toBytes(" ");
        gTableMap = new GroupingTableMap();
        Configuration cfg = new Configuration();
        cfg.set(GroupingTableMap.GROUP_COLUMNS, "familyA:qualifierA familyB:qualifierB");
        JobConf jobConf = new JobConf(cfg);
        gTableMap.configure(jobConf);
        final byte[] firstPartKeyValue = Bytes.toBytes("34879512738945");
        final byte[] secondPartKeyValue = Bytes.toBytes("35245142671437");
        byte[] row = {};
        List<Cell> cells = ImmutableList.<Cell>of(new KeyValue(row, "familyA".getBytes(), "qualifierA".getBytes(), firstPartKeyValue), new KeyValue(row, "familyB".getBytes(), "qualifierB".getBytes(), secondPartKeyValue));
        when(result.listCells()).thenReturn(cells);
        final AtomicBoolean outputCollected = new AtomicBoolean();
        OutputCollector<ImmutableBytesWritable, Result> outputCollector = new OutputCollector<ImmutableBytesWritable, Result>() {

            @Override
            public void collect(ImmutableBytesWritable arg, Result result) throws IOException {
                assertArrayEquals(com.google.common.primitives.Bytes.concat(firstPartKeyValue, bSeparator, secondPartKeyValue), arg.copyBytes());
                outputCollected.set(true);
            }
        };
        gTableMap.map(null, result, outputCollector, reporter);
        verify(result).listCells();
        Assert.assertTrue("Output not received", outputCollected.get());
        final byte[] firstPartValue = Bytes.toBytes("238947928");
        final byte[] secondPartValue = Bytes.toBytes("4678456942345");
        byte[][] data = { firstPartValue, secondPartValue };
        ImmutableBytesWritable byteWritable = gTableMap.createGroupKey(data);
        assertArrayEquals(com.google.common.primitives.Bytes.concat(firstPartValue, bSeparator, secondPartValue), byteWritable.get());
    } finally {
        if (gTableMap != null)
            gTableMap.close();
    }
}
Also used : OutputCollector(org.apache.hadoop.mapred.OutputCollector) KeyValue(org.apache.hadoop.hbase.KeyValue) ImmutableBytesWritable(org.apache.hadoop.hbase.io.ImmutableBytesWritable) Configuration(org.apache.hadoop.conf.Configuration) Reporter(org.apache.hadoop.mapred.Reporter) Result(org.apache.hadoop.hbase.client.Result) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) JobConf(org.apache.hadoop.mapred.JobConf) Cell(org.apache.hadoop.hbase.Cell) Test(org.junit.Test)

Example 13 with Reporter

use of org.apache.hadoop.mapred.Reporter in project hive by apache.

the class Hadoop23Shims method getCombineFileInputFormat.

@Override
public HadoopShims.CombineFileInputFormatShim getCombineFileInputFormat() {
    return new CombineFileInputFormatShim() {

        @Override
        public RecordReader getRecordReader(InputSplit split, JobConf job, Reporter reporter) throws IOException {
            throw new IOException("CombineFileInputFormat.getRecordReader not needed.");
        }

        @Override
        protected List<FileStatus> listStatus(JobContext job) throws IOException {
            List<FileStatus> result = super.listStatus(job);
            Iterator<FileStatus> it = result.iterator();
            while (it.hasNext()) {
                FileStatus stat = it.next();
                if (!stat.isFile() || (stat.getLen() == 0 && !stat.getPath().toUri().getScheme().equals("nullscan"))) {
                    it.remove();
                }
            }
            return result;
        }
    };
}
Also used : FileStatus(org.apache.hadoop.fs.FileStatus) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus) HdfsLocatedFileStatus(org.apache.hadoop.hdfs.protocol.HdfsLocatedFileStatus) Reporter(org.apache.hadoop.mapred.Reporter) IOException(java.io.IOException) JobContext(org.apache.hadoop.mapreduce.JobContext) InputSplit(org.apache.hadoop.mapred.InputSplit) JobConf(org.apache.hadoop.mapred.JobConf)

Example 14 with Reporter

use of org.apache.hadoop.mapred.Reporter in project hive by apache.

the class Rot13OutputFormat method getHiveRecordWriter.

@Override
public RecordWriter getHiveRecordWriter(JobConf jc, Path outPath, Class<? extends Writable> valueClass, boolean isCompressed, Properties tableProperties, Progressable progress) throws IOException {
    final RecordWriter result = super.getHiveRecordWriter(jc, outPath, valueClass, isCompressed, tableProperties, progress);
    final Reporter reporter = (Reporter) progress;
    reporter.setStatus("got here");
    System.out.println("Got a reporter " + reporter);
    return new RecordWriter() {

        @Override
        public void write(Writable w) throws IOException {
            if (w instanceof Text) {
                Text value = (Text) w;
                Rot13InputFormat.rot13(value.getBytes(), 0, value.getLength());
                result.write(w);
            } else if (w instanceof BytesWritable) {
                BytesWritable value = (BytesWritable) w;
                Rot13InputFormat.rot13(value.getBytes(), 0, value.getLength());
                result.write(w);
            } else {
                throw new IllegalArgumentException("need text or bytes writable " + " instead of " + w.getClass().getName());
            }
        }

        @Override
        public void close(boolean abort) throws IOException {
            result.close(abort);
        }
    };
}
Also used : RecordWriter(org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter) Reporter(org.apache.hadoop.mapred.Reporter) BytesWritable(org.apache.hadoop.io.BytesWritable) Writable(org.apache.hadoop.io.Writable) LongWritable(org.apache.hadoop.io.LongWritable) Text(org.apache.hadoop.io.Text) BytesWritable(org.apache.hadoop.io.BytesWritable)

Example 15 with Reporter

use of org.apache.hadoop.mapred.Reporter in project hive by apache.

the class DummyContextUDF method evaluate.

public Object evaluate(DeferredObject[] arguments) throws HiveException {
    Reporter reporter = context.getReporter();
    Counters.Counter counter = reporter.getCounter("org.apache.hadoop.mapred.Task$Counter", "MAP_INPUT_RECORDS");
    result.set(counter.getValue());
    return result;
}
Also used : Reporter(org.apache.hadoop.mapred.Reporter) Counters(org.apache.hadoop.mapred.Counters)

Aggregations

Reporter (org.apache.hadoop.mapred.Reporter)23 JobConf (org.apache.hadoop.mapred.JobConf)13 Test (org.junit.Test)12 FileSystem (org.apache.hadoop.fs.FileSystem)6 Result (org.apache.hadoop.hbase.client.Result)5 ImmutableBytesWritable (org.apache.hadoop.hbase.io.ImmutableBytesWritable)5 InputSplit (org.apache.hadoop.mapred.InputSplit)5 Path (org.apache.hadoop.fs.Path)4 CompressionCodec (org.apache.hadoop.io.compress.CompressionCodec)4 Counter (org.apache.hadoop.mapred.Counters.Counter)4 File (java.io.File)3 IOException (java.io.IOException)3 Configuration (org.apache.hadoop.conf.Configuration)3 LocalDirAllocator (org.apache.hadoop.fs.LocalDirAllocator)3 Cell (org.apache.hadoop.hbase.Cell)3 KeyValue (org.apache.hadoop.hbase.KeyValue)3 LongWritable (org.apache.hadoop.io.LongWritable)3 Text (org.apache.hadoop.io.Text)3 MapOutputFile (org.apache.hadoop.mapred.MapOutputFile)3 ShuffleConsumerPlugin (org.apache.hadoop.mapred.ShuffleConsumerPlugin)3