use of org.apache.hadoop.mapred.RecordWriter in project hive by apache.
the class TestInputOutputFormat method testMROutput.
@Test
public void testMROutput() throws Exception {
Properties properties = new Properties();
StructObjectInspector inspector;
synchronized (TestOrcFile.class) {
inspector = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(NestedRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
}
AbstractSerDe serde = new OrcSerde();
OutputFormat<?, ?> outFormat = new OrcOutputFormat();
RecordWriter writer = outFormat.getRecordWriter(fs, conf, testFilePath.toString(), Reporter.NULL);
writer.write(NullWritable.get(), serde.serialize(new NestedRow(1, 2, 3), inspector));
writer.write(NullWritable.get(), serde.serialize(new NestedRow(4, 5, 6), inspector));
writer.write(NullWritable.get(), serde.serialize(new NestedRow(7, 8, 9), inspector));
writer.close(Reporter.NULL);
serde = new OrcSerde();
properties.setProperty("columns", "z,r");
properties.setProperty("columns.types", "int:struct<x:int,y:int>");
SerDeUtils.initializeSerDe(serde, conf, properties, null);
inspector = (StructObjectInspector) serde.getObjectInspector();
InputFormat<?, ?> in = new OrcInputFormat();
FileInputFormat.setInputPaths(conf, testFilePath.toString());
InputSplit[] splits = in.getSplits(conf, 1);
assertEquals(1, splits.length);
ColumnProjectionUtils.appendReadColumns(conf, Collections.singletonList(1));
conf.set("columns", "z,r");
conf.set("columns.types", "int:struct<x:int,y:int>");
org.apache.hadoop.mapred.RecordReader reader = in.getRecordReader(splits[0], conf, Reporter.NULL);
Object key = reader.createKey();
Object value = reader.createValue();
int rowNum = 0;
List<? extends StructField> fields = inspector.getAllStructFieldRefs();
StructObjectInspector inner = (StructObjectInspector) fields.get(1).getFieldObjectInspector();
List<? extends StructField> inFields = inner.getAllStructFieldRefs();
IntObjectInspector intInspector = (IntObjectInspector) fields.get(0).getFieldObjectInspector();
while (reader.next(key, value)) {
assertEquals(null, inspector.getStructFieldData(value, fields.get(0)));
Object sub = inspector.getStructFieldData(value, fields.get(1));
assertEquals(3 * rowNum + 1, intInspector.get(inner.getStructFieldData(sub, inFields.get(0))));
assertEquals(3 * rowNum + 2, intInspector.get(inner.getStructFieldData(sub, inFields.get(1))));
rowNum += 1;
}
assertEquals(3, rowNum);
reader.close();
}
use of org.apache.hadoop.mapred.RecordWriter in project hive by apache.
the class RCFileOutputFormat method getRecordWriter.
/** {@inheritDoc} */
@Override
public RecordWriter<WritableComparable, BytesRefArrayWritable> getRecordWriter(FileSystem ignored, JobConf job, String name, Progressable progress) throws IOException {
Path outputPath = getWorkOutputPath(job);
FileSystem fs = outputPath.getFileSystem(job);
Path file = new Path(outputPath, name);
CompressionCodec codec = null;
if (getCompressOutput(job)) {
Class<?> codecClass = getOutputCompressorClass(job, DefaultCodec.class);
codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, job);
}
final RCFile.Writer out = new RCFile.Writer(fs, job, file, progress, codec);
return new RecordWriter<WritableComparable, BytesRefArrayWritable>() {
@Override
public void close(Reporter reporter) throws IOException {
out.close();
}
@Override
public void write(WritableComparable key, BytesRefArrayWritable value) throws IOException {
out.append(value);
}
};
}
use of org.apache.hadoop.mapred.RecordWriter in project hive by apache.
the class FileRecordWriterContainer method write.
@Override
public void write(WritableComparable<?> key, HCatRecord value) throws IOException, InterruptedException {
LocalFileWriter localFileWriter = getLocalFileWriter(value);
RecordWriter localWriter = localFileWriter.getLocalWriter();
ObjectInspector localObjectInspector = localFileWriter.getLocalObjectInspector();
AbstractSerDe localSerDe = localFileWriter.getLocalSerDe();
OutputJobInfo localJobInfo = localFileWriter.getLocalJobInfo();
for (Integer colToDel : partColsToDel) {
value.remove(colToDel);
}
// The key given by user is ignored
try {
localWriter.write(NullWritable.get(), localSerDe.serialize(value.getAll(), localObjectInspector));
} catch (SerDeException e) {
throw new IOException("Failed to serialize object", e);
}
}
use of org.apache.hadoop.mapred.RecordWriter in project trevni by cutting.
the class AvroTrevniOutputFormat method getRecordWriter.
@Override
public RecordWriter<AvroWrapper<T>, NullWritable> getRecordWriter(FileSystem ignore, final JobConf job, final String name, Progressable prog) throws IOException {
boolean isMapOnly = job.getNumReduceTasks() == 0;
final Schema schema = isMapOnly ? AvroJob.getMapOutputSchema(job) : AvroJob.getOutputSchema(job);
final ColumnFileMetaData meta = new ColumnFileMetaData();
for (Map.Entry<String, String> e : job) if (e.getKey().startsWith(META_PREFIX))
meta.put(e.getKey().substring(AvroJob.TEXT_PREFIX.length()), e.getValue().getBytes(MetaData.UTF8));
final Path dir = FileOutputFormat.getTaskOutputPath(job, name);
final FileSystem fs = dir.getFileSystem(job);
if (!fs.mkdirs(dir))
throw new IOException("Failed to create directory: " + dir);
final long blockSize = fs.getDefaultBlockSize();
return new RecordWriter<AvroWrapper<T>, NullWritable>() {
private int part = 0;
private AvroColumnWriter<T> writer = new AvroColumnWriter<T>(schema, meta, ReflectData.get());
private void flush() throws IOException {
OutputStream out = fs.create(new Path(dir, "part-" + (part++) + EXT));
try {
writer.writeTo(out);
} finally {
out.close();
}
writer = new AvroColumnWriter<T>(schema, meta, ReflectData.get());
}
public void write(AvroWrapper<T> wrapper, NullWritable ignore) throws IOException {
writer.write(wrapper.datum());
if (// block full
writer.sizeEstimate() >= blockSize)
flush();
}
public void close(Reporter reporter) throws IOException {
flush();
}
};
}
use of org.apache.hadoop.mapred.RecordWriter in project hbase by apache.
the class TestTableOutputFormatConnectionExhaust method openCloseTableOutputFormat.
/**
* Open and close a TableOutputFormat. The closing the RecordWriter should release HBase
* Connection (ZK) resources, and will throw exception if they are exhausted.
*/
static void openCloseTableOutputFormat(int iter) throws IOException {
LOG.info("Instantiating TableOutputFormat connection " + iter);
JobConf conf = new JobConf();
conf.addResource(UTIL.getConfiguration());
conf.set(TableOutputFormat.OUTPUT_TABLE, TABLE);
TableMapReduceUtil.initTableMapJob(TABLE, FAMILY, TableMap.class, ImmutableBytesWritable.class, ImmutableBytesWritable.class, conf);
TableOutputFormat tof = new TableOutputFormat();
RecordWriter rw = tof.getRecordWriter(null, conf, TABLE, null);
rw.close(null);
}
Aggregations