Examples with RecordWriter - org.apache.hadoop.mapreduce.RecordWriter

Example 16 with RecordWriter

use of org.apache.hadoop.mapreduce.RecordWriter in project cdap by caskdata.

the class MultiWriter method close.

@Override
public void close(TaskAttemptContext context) throws IOException, InterruptedException {
    try {
        Map<PartitionKey, RecordWriter<?, ?>> recordWriters = new HashMap<>();
        recordWriters.putAll(this.recordWriters);
        MultipleOutputs.closeRecordWriters(recordWriters, contexts);
        taskContext.flushOperations();
    } catch (Exception e) {
        throw new IOException(e);
    } finally {
        dynamicPartitioner.destroy();
    }
}

Also used : RecordWriter(org.apache.hadoop.mapreduce.RecordWriter) HashMap(java.util.HashMap) PartitionKey(co.cask.cdap.api.dataset.lib.PartitionKey) IOException(java.io.IOException) IOException(java.io.IOException)

Example 17 with RecordWriter

use of org.apache.hadoop.mapreduce.RecordWriter in project hive by apache.

the class RCFileMapReduceOutputFormat method getRecordWriter.

/* (non-Javadoc)
  * @see org.apache.hadoop.mapreduce.lib.output.FileOutputFormat#getRecordWriter(org.apache.hadoop.mapreduce.TaskAttemptContext)
  */
@Override
public org.apache.hadoop.mapreduce.RecordWriter<WritableComparable<?>, BytesRefArrayWritable> getRecordWriter(TaskAttemptContext task) throws IOException, InterruptedException {
    //FileOutputFormat.getWorkOutputPath takes TaskInputOutputContext instead of
    //TaskAttemptContext, so can't use that here
    FileOutputCommitter committer = (FileOutputCommitter) getOutputCommitter(task);
    Path outputPath = committer.getWorkPath();
    FileSystem fs = outputPath.getFileSystem(task.getConfiguration());
    if (!fs.exists(outputPath)) {
        fs.mkdirs(outputPath);
    }
    Path file = getDefaultWorkFile(task, "");
    CompressionCodec codec = null;
    if (getCompressOutput(task)) {
        Class<?> codecClass = getOutputCompressorClass(task, DefaultCodec.class);
        codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, task.getConfiguration());
    }
    final RCFile.Writer out = new RCFile.Writer(fs, task.getConfiguration(), file, task, codec);
    return new RecordWriter<WritableComparable<?>, BytesRefArrayWritable>() {

        /* (non-Javadoc)
      * @see org.apache.hadoop.mapreduce.RecordWriter#write(java.lang.Object, java.lang.Object)
      */
        @Override
        public void write(WritableComparable<?> key, BytesRefArrayWritable value) throws IOException {
            out.append(value);
        }

        /* (non-Javadoc)
      * @see org.apache.hadoop.mapreduce.RecordWriter#close(org.apache.hadoop.mapreduce.TaskAttemptContext)
      */
        @Override
        public void close(TaskAttemptContext task) throws IOException, InterruptedException {
            out.close();
        }
    };
}

Also used : Path(org.apache.hadoop.fs.Path) RCFile(org.apache.hadoop.hive.ql.io.RCFile) RecordWriter(org.apache.hadoop.mapreduce.RecordWriter) BytesRefArrayWritable(org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable) WritableComparable(org.apache.hadoop.io.WritableComparable) FileOutputCommitter(org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter) FileSystem(org.apache.hadoop.fs.FileSystem) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) CompressionCodec(org.apache.hadoop.io.compress.CompressionCodec) RecordWriter(org.apache.hadoop.mapreduce.RecordWriter)

Example 18 with RecordWriter

use of org.apache.hadoop.mapreduce.RecordWriter in project goldenorb by jzachr.

the class OrbPartition method dumpData.

private void dumpData() {
    Configuration conf = new Configuration();
    Job job = null;
    JobContext jobContext = null;
    TaskAttemptContext tao = null;
    RecordWriter rw;
    VertexWriter vw;
    FileOutputFormat outputFormat;
    boolean tryAgain = true;
    int count = 0;
    while (tryAgain && count < 15) try {
        count++;
        tryAgain = false;
        if (job == null) {
            job = new Job(conf);
            job.setOutputFormatClass(TextOutputFormat.class);
            FileOutputFormat.setOutputPath(job, new Path(new String(getOrbConf().getNameNode() + getOrbConf().getFileOutputPath())));
        }
        if (jobContext == null) {
            jobContext = new JobContext(job.getConfiguration(), new JobID());
        }
        System.out.println(jobContext.getConfiguration().get("mapred.output.dir"));
        tao = new TaskAttemptContext(jobContext.getConfiguration(), new TaskAttemptID(new TaskID(jobContext.getJobID(), true, getPartitionID()), 0));
        outputFormat = (FileOutputFormat) tao.getOutputFormatClass().newInstance();
        rw = outputFormat.getRecordWriter(tao);
        vw = (VertexWriter) getOrbConf().getVertexOutputFormatClass().newInstance();
        for (Vertex v : vertices.values()) {
            OrbContext oc = vw.vertexWrite(v);
            rw.write(oc.getKey(), oc.getValue());
        // orbLogger.info("Partition: " + Integer.toString(partitionId) + "writing: " +
        // oc.getKey().toString() + ", " + oc.getValue().toString());
        }
        rw.close(tao);
        FileOutputCommitter cm = (FileOutputCommitter) outputFormat.getOutputCommitter(tao);
        if (cm.needsTaskCommit(tao)) {
            cm.commitTask(tao);
            cm.cleanupJob(jobContext);
        } else {
            cm.cleanupJob(jobContext);
            tryAgain = true;
        }
    } catch (IOException e) {
        tryAgain = true;
        e.printStackTrace();
    } catch (InstantiationException e) {
        tryAgain = true;
        e.printStackTrace();
    } catch (IllegalAccessException e) {
        tryAgain = true;
        e.printStackTrace();
    } catch (ClassNotFoundException e) {
        tryAgain = true;
        e.printStackTrace();
    } catch (InterruptedException e) {
        tryAgain = true;
        e.printStackTrace();
    }
    if (tryAgain) {
        synchronized (this) {
            try {
                wait(1000);
            } catch (InterruptedException e) {
                e.printStackTrace();
            }
        }
    }
}

Also used : FileOutputFormat(org.apache.hadoop.mapreduce.lib.output.FileOutputFormat) Path(org.apache.hadoop.fs.Path) TaskID(org.apache.hadoop.mapreduce.TaskID) Configuration(org.apache.hadoop.conf.Configuration) OrbConfiguration(org.goldenorb.conf.OrbConfiguration) TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) FileOutputCommitter(org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) IOException(java.io.IOException) RecordWriter(org.apache.hadoop.mapreduce.RecordWriter) TextOutputFormat(org.apache.hadoop.mapreduce.lib.output.TextOutputFormat) OrbContext(org.goldenorb.io.output.OrbContext) VertexWriter(org.goldenorb.io.output.VertexWriter) JobContext(org.apache.hadoop.mapreduce.JobContext) Job(org.apache.hadoop.mapreduce.Job) JobID(org.apache.hadoop.mapreduce.JobID)

Example 19 with RecordWriter

use of org.apache.hadoop.mapreduce.RecordWriter in project hadoop by apache.

the class TestRecovery method writeBadOutput.

private void writeBadOutput(TaskAttempt attempt, Configuration conf) throws Exception {
    TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, TypeConverter.fromYarn(attempt.getID()));
    TextOutputFormat<?, ?> theOutputFormat = new TextOutputFormat();
    RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(tContext);
    NullWritable nullWritable = NullWritable.get();
    try {
        theRecordWriter.write(key2, val2);
        theRecordWriter.write(null, nullWritable);
        theRecordWriter.write(null, val2);
        theRecordWriter.write(nullWritable, val1);
        theRecordWriter.write(key1, nullWritable);
        theRecordWriter.write(key2, null);
        theRecordWriter.write(null, null);
        theRecordWriter.write(key1, val1);
    } finally {
        theRecordWriter.close(tContext);
    }
    OutputFormat outputFormat = ReflectionUtils.newInstance(tContext.getOutputFormatClass(), conf);
    OutputCommitter committer = outputFormat.getOutputCommitter(tContext);
    committer.commitTask(tContext);
}

Also used : OutputCommitter(org.apache.hadoop.mapreduce.OutputCommitter) RecordWriter(org.apache.hadoop.mapreduce.RecordWriter) TextOutputFormat(org.apache.hadoop.mapreduce.lib.output.TextOutputFormat) TaskAttemptContextImpl(org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl) OutputFormat(org.apache.hadoop.mapreduce.OutputFormat) TextOutputFormat(org.apache.hadoop.mapreduce.lib.output.TextOutputFormat) FileOutputFormat(org.apache.hadoop.mapreduce.lib.output.FileOutputFormat) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) NullWritable(org.apache.hadoop.io.NullWritable)

Example 20 with RecordWriter

use of org.apache.hadoop.mapreduce.RecordWriter in project hadoop by apache.

the class Chain method runMapper.

@SuppressWarnings("unchecked")
void runMapper(TaskInputOutputContext context, int index) throws IOException, InterruptedException {
    Mapper mapper = mappers.get(index);
    RecordReader rr = new ChainRecordReader(context);
    RecordWriter rw = new ChainRecordWriter(context);
    Mapper.Context mapperContext = createMapContext(rr, rw, context, getConf(index));
    mapper.run(mapperContext);
    rr.close();
    rw.close(context);
}

Also used : Mapper(org.apache.hadoop.mapreduce.Mapper) WrappedMapper(org.apache.hadoop.mapreduce.lib.map.WrappedMapper) RecordWriter(org.apache.hadoop.mapreduce.RecordWriter) RecordReader(org.apache.hadoop.mapreduce.RecordReader)

Aggregations

RecordWriter (org.apache.hadoop.mapreduce.RecordWriter)26 TaskAttemptContext (org.apache.hadoop.mapreduce.TaskAttemptContext)19 Configuration (org.apache.hadoop.conf.Configuration)16 TaskAttemptContextImpl (org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl)12 Job (org.apache.hadoop.mapreduce.Job)11 JobContext (org.apache.hadoop.mapreduce.JobContext)11 IOException (java.io.IOException)10 Path (org.apache.hadoop.fs.Path)10 JobContextImpl (org.apache.hadoop.mapreduce.task.JobContextImpl)10 MapFile (org.apache.hadoop.io.MapFile)9 File (java.io.File)8 FileSystem (org.apache.hadoop.fs.FileSystem)4 NullWritable (org.apache.hadoop.io.NullWritable)4 OutputCommitter (org.apache.hadoop.mapreduce.OutputCommitter)4 FileOutputCommitter (org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter)4 WritableComparable (org.apache.hadoop.io.WritableComparable)3 OutputFormat (org.apache.hadoop.mapreduce.OutputFormat)3 FileOutputFormat (org.apache.hadoop.mapreduce.lib.output.FileOutputFormat)3 TextOutputFormat (org.apache.hadoop.mapreduce.lib.output.TextOutputFormat)3 FileNotFoundException (java.io.FileNotFoundException)2