Search in sources :

Example 16 with NullWritable

use of org.apache.hadoop.io.NullWritable in project hadoop by apache.

the class TestRecovery method writeOutput.

private void writeOutput(TaskAttempt attempt, Configuration conf) throws Exception {
    TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, TypeConverter.fromYarn(attempt.getID()));
    TextOutputFormat<?, ?> theOutputFormat = new TextOutputFormat();
    RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(tContext);
    NullWritable nullWritable = NullWritable.get();
    try {
        theRecordWriter.write(key1, val1);
        theRecordWriter.write(null, nullWritable);
        theRecordWriter.write(null, val1);
        theRecordWriter.write(nullWritable, val2);
        theRecordWriter.write(key2, nullWritable);
        theRecordWriter.write(key1, null);
        theRecordWriter.write(null, null);
        theRecordWriter.write(key2, val2);
    } finally {
        theRecordWriter.close(tContext);
    }
    OutputFormat outputFormat = ReflectionUtils.newInstance(tContext.getOutputFormatClass(), conf);
    OutputCommitter committer = outputFormat.getOutputCommitter(tContext);
    committer.commitTask(tContext);
}
Also used : OutputCommitter(org.apache.hadoop.mapreduce.OutputCommitter) RecordWriter(org.apache.hadoop.mapreduce.RecordWriter) TextOutputFormat(org.apache.hadoop.mapreduce.lib.output.TextOutputFormat) TaskAttemptContextImpl(org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl) OutputFormat(org.apache.hadoop.mapreduce.OutputFormat) TextOutputFormat(org.apache.hadoop.mapreduce.lib.output.TextOutputFormat) FileOutputFormat(org.apache.hadoop.mapreduce.lib.output.FileOutputFormat) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) NullWritable(org.apache.hadoop.io.NullWritable)

Example 17 with NullWritable

use of org.apache.hadoop.io.NullWritable in project hive by apache.

the class StreamingAssert method readRecords.

List<Record> readRecords() throws Exception {
    if (currentDeltas.isEmpty()) {
        throw new AssertionError("No data");
    }
    InputFormat<NullWritable, OrcStruct> inputFormat = new OrcInputFormat();
    JobConf job = new JobConf();
    job.set("mapred.input.dir", partitionLocation.toString());
    job.set("bucket_count", Integer.toString(table.getSd().getNumBuckets()));
    job.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS, "id,msg");
    job.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, "bigint:string");
    job.set(ConfVars.HIVE_TRANSACTIONAL_TABLE_SCAN.varname, "true");
    job.set(ValidTxnList.VALID_TXNS_KEY, txns.toString());
    InputSplit[] splits = inputFormat.getSplits(job, 1);
    assertEquals(1, splits.length);
    final AcidRecordReader<NullWritable, OrcStruct> recordReader = (AcidRecordReader<NullWritable, OrcStruct>) inputFormat.getRecordReader(splits[0], job, Reporter.NULL);
    NullWritable key = recordReader.createKey();
    OrcStruct value = recordReader.createValue();
    List<Record> records = new ArrayList<>();
    while (recordReader.next(key, value)) {
        RecordIdentifier recordIdentifier = recordReader.getRecordIdentifier();
        Record record = new Record(new RecordIdentifier(recordIdentifier.getTransactionId(), recordIdentifier.getBucketId(), recordIdentifier.getRowId()), value.toString());
        System.out.println(record);
        records.add(record);
    }
    recordReader.close();
    return records;
}
Also used : ArrayList(java.util.ArrayList) AcidRecordReader(org.apache.hadoop.hive.ql.io.AcidInputFormat.AcidRecordReader) NullWritable(org.apache.hadoop.io.NullWritable) RecordIdentifier(org.apache.hadoop.hive.ql.io.RecordIdentifier) OrcStruct(org.apache.hadoop.hive.ql.io.orc.OrcStruct) OrcInputFormat(org.apache.hadoop.hive.ql.io.orc.OrcInputFormat) JobConf(org.apache.hadoop.mapred.JobConf) InputSplit(org.apache.hadoop.mapred.InputSplit)

Example 18 with NullWritable

use of org.apache.hadoop.io.NullWritable in project trevni by cutting.

the class AvroTrevniInputFormat method getRecordReader.

@Override
public RecordReader<AvroWrapper<T>, NullWritable> getRecordReader(InputSplit split, final JobConf job, Reporter reporter) throws IOException {
    final FileSplit file = (FileSplit) split;
    reporter.setStatus(file.toString());
    final AvroColumnReader.Params params = new AvroColumnReader.Params(new HadoopInput(file.getPath(), job));
    params.setModel(ReflectData.get());
    if (job.get(AvroJob.INPUT_SCHEMA) != null)
        params.setSchema(AvroJob.getInputSchema(job));
    return new RecordReader<AvroWrapper<T>, NullWritable>() {

        private AvroColumnReader<T> reader = new AvroColumnReader<T>(params);

        private float rows = reader.getRowCount();

        private long row;

        public AvroWrapper<T> createKey() {
            return new AvroWrapper<T>(null);
        }

        public NullWritable createValue() {
            return NullWritable.get();
        }

        public boolean next(AvroWrapper<T> wrapper, NullWritable ignore) throws IOException {
            if (!reader.hasNext())
                return false;
            wrapper.datum(reader.next());
            row++;
            return true;
        }

        public float getProgress() throws IOException {
            return row / rows;
        }

        public long getPos() throws IOException {
            return row;
        }

        public void close() throws IOException {
            reader.close();
        }
    };
}
Also used : RecordReader(org.apache.hadoop.mapred.RecordReader) AvroWrapper(org.apache.avro.mapred.AvroWrapper) FileSplit(org.apache.hadoop.mapred.FileSplit) NullWritable(org.apache.hadoop.io.NullWritable)

Example 19 with NullWritable

use of org.apache.hadoop.io.NullWritable in project trevni by cutting.

the class AvroTrevniOutputFormat method getRecordWriter.

@Override
public RecordWriter<AvroWrapper<T>, NullWritable> getRecordWriter(FileSystem ignore, final JobConf job, final String name, Progressable prog) throws IOException {
    boolean isMapOnly = job.getNumReduceTasks() == 0;
    final Schema schema = isMapOnly ? AvroJob.getMapOutputSchema(job) : AvroJob.getOutputSchema(job);
    final ColumnFileMetaData meta = new ColumnFileMetaData();
    for (Map.Entry<String, String> e : job) if (e.getKey().startsWith(META_PREFIX))
        meta.put(e.getKey().substring(AvroJob.TEXT_PREFIX.length()), e.getValue().getBytes(MetaData.UTF8));
    final Path dir = FileOutputFormat.getTaskOutputPath(job, name);
    final FileSystem fs = dir.getFileSystem(job);
    if (!fs.mkdirs(dir))
        throw new IOException("Failed to create directory: " + dir);
    final long blockSize = fs.getDefaultBlockSize();
    return new RecordWriter<AvroWrapper<T>, NullWritable>() {

        private int part = 0;

        private AvroColumnWriter<T> writer = new AvroColumnWriter<T>(schema, meta, ReflectData.get());

        private void flush() throws IOException {
            OutputStream out = fs.create(new Path(dir, "part-" + (part++) + EXT));
            try {
                writer.writeTo(out);
            } finally {
                out.close();
            }
            writer = new AvroColumnWriter<T>(schema, meta, ReflectData.get());
        }

        public void write(AvroWrapper<T> wrapper, NullWritable ignore) throws IOException {
            writer.write(wrapper.datum());
            if (// block full
            writer.sizeEstimate() >= blockSize)
                flush();
        }

        public void close(Reporter reporter) throws IOException {
            flush();
        }
    };
}
Also used : Path(org.apache.hadoop.fs.Path) Schema(org.apache.avro.Schema) OutputStream(java.io.OutputStream) Reporter(org.apache.hadoop.mapred.Reporter) IOException(java.io.IOException) NullWritable(org.apache.hadoop.io.NullWritable) RecordWriter(org.apache.hadoop.mapred.RecordWriter) ColumnFileMetaData(org.apache.trevni.ColumnFileMetaData) FileSystem(org.apache.hadoop.fs.FileSystem) AvroWrapper(org.apache.avro.mapred.AvroWrapper) Map(java.util.Map)

Example 20 with NullWritable

use of org.apache.hadoop.io.NullWritable in project crunch by cloudera.

the class AvroOutputFormat method getRecordWriter.

@Override
public RecordWriter<AvroWrapper<T>, NullWritable> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException {
    Configuration conf = context.getConfiguration();
    Schema schema = null;
    String outputName = conf.get("crunch.namedoutput");
    if (outputName != null && !outputName.isEmpty()) {
        schema = (new Schema.Parser()).parse(conf.get("avro.output.schema." + outputName));
    } else {
        schema = AvroJob.getOutputSchema(context.getConfiguration());
    }
    ReflectDataFactory factory = Avros.getReflectDataFactory(conf);
    final DataFileWriter<T> WRITER = new DataFileWriter<T>(factory.<T>getWriter());
    Path path = getDefaultWorkFile(context, org.apache.avro.mapred.AvroOutputFormat.EXT);
    WRITER.create(schema, path.getFileSystem(context.getConfiguration()).create(path));
    return new RecordWriter<AvroWrapper<T>, NullWritable>() {

        @Override
        public void write(AvroWrapper<T> wrapper, NullWritable ignore) throws IOException {
            WRITER.append(wrapper.datum());
        }

        @Override
        public void close(TaskAttemptContext context) throws IOException, InterruptedException {
            WRITER.close();
        }
    };
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) Schema(org.apache.avro.Schema) DataFileWriter(org.apache.avro.file.DataFileWriter) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) NullWritable(org.apache.hadoop.io.NullWritable) RecordWriter(org.apache.hadoop.mapreduce.RecordWriter) AvroWrapper(org.apache.avro.mapred.AvroWrapper)

Aggregations

NullWritable (org.apache.hadoop.io.NullWritable)113 Test (org.junit.Test)68 Path (org.apache.hadoop.fs.Path)47 Configuration (org.apache.hadoop.conf.Configuration)44 File (java.io.File)33 FileSystem (org.apache.hadoop.fs.FileSystem)28 SequenceFile (org.apache.hadoop.io.SequenceFile)24 JobConf (org.apache.hadoop.mapred.JobConf)24 RouteBuilder (org.apache.camel.builder.RouteBuilder)18 MockEndpoint (org.apache.camel.component.mock.MockEndpoint)18 ArrayFile (org.apache.hadoop.io.ArrayFile)18 Text (org.apache.hadoop.io.Text)17 InputSplit (org.apache.hadoop.mapred.InputSplit)17 LongWritable (org.apache.hadoop.io.LongWritable)16 IntWritable (org.apache.hadoop.io.IntWritable)11 IOException (java.io.IOException)10 Writer (org.apache.hadoop.io.SequenceFile.Writer)9 TaskAttemptContextImpl (org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl)8 Pair (org.apache.hadoop.mrunit.types.Pair)8 CharacteristicSetWritable (org.apache.jena.hadoop.rdf.types.CharacteristicSetWritable)8