use of org.apache.hadoop.io.NullWritable in project hadoop by apache.
the class TestRecovery method writeOutput.
private void writeOutput(TaskAttempt attempt, Configuration conf) throws Exception {
TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, TypeConverter.fromYarn(attempt.getID()));
TextOutputFormat<?, ?> theOutputFormat = new TextOutputFormat();
RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(tContext);
NullWritable nullWritable = NullWritable.get();
try {
theRecordWriter.write(key1, val1);
theRecordWriter.write(null, nullWritable);
theRecordWriter.write(null, val1);
theRecordWriter.write(nullWritable, val2);
theRecordWriter.write(key2, nullWritable);
theRecordWriter.write(key1, null);
theRecordWriter.write(null, null);
theRecordWriter.write(key2, val2);
} finally {
theRecordWriter.close(tContext);
}
OutputFormat outputFormat = ReflectionUtils.newInstance(tContext.getOutputFormatClass(), conf);
OutputCommitter committer = outputFormat.getOutputCommitter(tContext);
committer.commitTask(tContext);
}
use of org.apache.hadoop.io.NullWritable in project hive by apache.
the class StreamingAssert method readRecords.
List<Record> readRecords() throws Exception {
if (currentDeltas.isEmpty()) {
throw new AssertionError("No data");
}
InputFormat<NullWritable, OrcStruct> inputFormat = new OrcInputFormat();
JobConf job = new JobConf();
job.set("mapred.input.dir", partitionLocation.toString());
job.set("bucket_count", Integer.toString(table.getSd().getNumBuckets()));
job.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS, "id,msg");
job.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, "bigint:string");
job.set(ConfVars.HIVE_TRANSACTIONAL_TABLE_SCAN.varname, "true");
job.set(ValidTxnList.VALID_TXNS_KEY, txns.toString());
InputSplit[] splits = inputFormat.getSplits(job, 1);
assertEquals(1, splits.length);
final AcidRecordReader<NullWritable, OrcStruct> recordReader = (AcidRecordReader<NullWritable, OrcStruct>) inputFormat.getRecordReader(splits[0], job, Reporter.NULL);
NullWritable key = recordReader.createKey();
OrcStruct value = recordReader.createValue();
List<Record> records = new ArrayList<>();
while (recordReader.next(key, value)) {
RecordIdentifier recordIdentifier = recordReader.getRecordIdentifier();
Record record = new Record(new RecordIdentifier(recordIdentifier.getTransactionId(), recordIdentifier.getBucketId(), recordIdentifier.getRowId()), value.toString());
System.out.println(record);
records.add(record);
}
recordReader.close();
return records;
}
use of org.apache.hadoop.io.NullWritable in project trevni by cutting.
the class AvroTrevniInputFormat method getRecordReader.
@Override
public RecordReader<AvroWrapper<T>, NullWritable> getRecordReader(InputSplit split, final JobConf job, Reporter reporter) throws IOException {
final FileSplit file = (FileSplit) split;
reporter.setStatus(file.toString());
final AvroColumnReader.Params params = new AvroColumnReader.Params(new HadoopInput(file.getPath(), job));
params.setModel(ReflectData.get());
if (job.get(AvroJob.INPUT_SCHEMA) != null)
params.setSchema(AvroJob.getInputSchema(job));
return new RecordReader<AvroWrapper<T>, NullWritable>() {
private AvroColumnReader<T> reader = new AvroColumnReader<T>(params);
private float rows = reader.getRowCount();
private long row;
public AvroWrapper<T> createKey() {
return new AvroWrapper<T>(null);
}
public NullWritable createValue() {
return NullWritable.get();
}
public boolean next(AvroWrapper<T> wrapper, NullWritable ignore) throws IOException {
if (!reader.hasNext())
return false;
wrapper.datum(reader.next());
row++;
return true;
}
public float getProgress() throws IOException {
return row / rows;
}
public long getPos() throws IOException {
return row;
}
public void close() throws IOException {
reader.close();
}
};
}
use of org.apache.hadoop.io.NullWritable in project trevni by cutting.
the class AvroTrevniOutputFormat method getRecordWriter.
@Override
public RecordWriter<AvroWrapper<T>, NullWritable> getRecordWriter(FileSystem ignore, final JobConf job, final String name, Progressable prog) throws IOException {
boolean isMapOnly = job.getNumReduceTasks() == 0;
final Schema schema = isMapOnly ? AvroJob.getMapOutputSchema(job) : AvroJob.getOutputSchema(job);
final ColumnFileMetaData meta = new ColumnFileMetaData();
for (Map.Entry<String, String> e : job) if (e.getKey().startsWith(META_PREFIX))
meta.put(e.getKey().substring(AvroJob.TEXT_PREFIX.length()), e.getValue().getBytes(MetaData.UTF8));
final Path dir = FileOutputFormat.getTaskOutputPath(job, name);
final FileSystem fs = dir.getFileSystem(job);
if (!fs.mkdirs(dir))
throw new IOException("Failed to create directory: " + dir);
final long blockSize = fs.getDefaultBlockSize();
return new RecordWriter<AvroWrapper<T>, NullWritable>() {
private int part = 0;
private AvroColumnWriter<T> writer = new AvroColumnWriter<T>(schema, meta, ReflectData.get());
private void flush() throws IOException {
OutputStream out = fs.create(new Path(dir, "part-" + (part++) + EXT));
try {
writer.writeTo(out);
} finally {
out.close();
}
writer = new AvroColumnWriter<T>(schema, meta, ReflectData.get());
}
public void write(AvroWrapper<T> wrapper, NullWritable ignore) throws IOException {
writer.write(wrapper.datum());
if (// block full
writer.sizeEstimate() >= blockSize)
flush();
}
public void close(Reporter reporter) throws IOException {
flush();
}
};
}
use of org.apache.hadoop.io.NullWritable in project crunch by cloudera.
the class AvroOutputFormat method getRecordWriter.
@Override
public RecordWriter<AvroWrapper<T>, NullWritable> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException {
Configuration conf = context.getConfiguration();
Schema schema = null;
String outputName = conf.get("crunch.namedoutput");
if (outputName != null && !outputName.isEmpty()) {
schema = (new Schema.Parser()).parse(conf.get("avro.output.schema." + outputName));
} else {
schema = AvroJob.getOutputSchema(context.getConfiguration());
}
ReflectDataFactory factory = Avros.getReflectDataFactory(conf);
final DataFileWriter<T> WRITER = new DataFileWriter<T>(factory.<T>getWriter());
Path path = getDefaultWorkFile(context, org.apache.avro.mapred.AvroOutputFormat.EXT);
WRITER.create(schema, path.getFileSystem(context.getConfiguration()).create(path));
return new RecordWriter<AvroWrapper<T>, NullWritable>() {
@Override
public void write(AvroWrapper<T> wrapper, NullWritable ignore) throws IOException {
WRITER.append(wrapper.datum());
}
@Override
public void close(TaskAttemptContext context) throws IOException, InterruptedException {
WRITER.close();
}
};
}
Aggregations