Search in sources :

Example 1 with RecordWriter

use of org.apache.hadoop.mapred.RecordWriter in project hive by apache.

the class TestInputOutputFormat method testMROutput.

@Test
public void testMROutput() throws Exception {
    Properties properties = new Properties();
    StructObjectInspector inspector;
    synchronized (TestOrcFile.class) {
        inspector = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(NestedRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    AbstractSerDe serde = new OrcSerde();
    OutputFormat<?, ?> outFormat = new OrcOutputFormat();
    RecordWriter writer = outFormat.getRecordWriter(fs, conf, testFilePath.toString(), Reporter.NULL);
    writer.write(NullWritable.get(), serde.serialize(new NestedRow(1, 2, 3), inspector));
    writer.write(NullWritable.get(), serde.serialize(new NestedRow(4, 5, 6), inspector));
    writer.write(NullWritable.get(), serde.serialize(new NestedRow(7, 8, 9), inspector));
    writer.close(Reporter.NULL);
    serde = new OrcSerde();
    properties.setProperty("columns", "z,r");
    properties.setProperty("columns.types", "int:struct<x:int,y:int>");
    SerDeUtils.initializeSerDe(serde, conf, properties, null);
    inspector = (StructObjectInspector) serde.getObjectInspector();
    InputFormat<?, ?> in = new OrcInputFormat();
    FileInputFormat.setInputPaths(conf, testFilePath.toString());
    InputSplit[] splits = in.getSplits(conf, 1);
    assertEquals(1, splits.length);
    ColumnProjectionUtils.appendReadColumns(conf, Collections.singletonList(1));
    conf.set("columns", "z,r");
    conf.set("columns.types", "int:struct<x:int,y:int>");
    org.apache.hadoop.mapred.RecordReader reader = in.getRecordReader(splits[0], conf, Reporter.NULL);
    Object key = reader.createKey();
    Object value = reader.createValue();
    int rowNum = 0;
    List<? extends StructField> fields = inspector.getAllStructFieldRefs();
    StructObjectInspector inner = (StructObjectInspector) fields.get(1).getFieldObjectInspector();
    List<? extends StructField> inFields = inner.getAllStructFieldRefs();
    IntObjectInspector intInspector = (IntObjectInspector) fields.get(0).getFieldObjectInspector();
    while (reader.next(key, value)) {
        assertEquals(null, inspector.getStructFieldData(value, fields.get(0)));
        Object sub = inspector.getStructFieldData(value, fields.get(1));
        assertEquals(3 * rowNum + 1, intInspector.get(inner.getStructFieldData(sub, inFields.get(0))));
        assertEquals(3 * rowNum + 2, intInspector.get(inner.getStructFieldData(sub, inFields.get(1))));
        rowNum += 1;
    }
    assertEquals(3, rowNum);
    reader.close();
}
Also used : IntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector) Properties(java.util.Properties) AbstractSerDe(org.apache.hadoop.hive.serde2.AbstractSerDe) RecordWriter(org.apache.hadoop.mapred.RecordWriter) InputSplit(org.apache.hadoop.mapred.InputSplit) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Test(org.junit.Test)

Example 2 with RecordWriter

use of org.apache.hadoop.mapred.RecordWriter in project hive by apache.

the class RCFileOutputFormat method getRecordWriter.

/** {@inheritDoc} */
@Override
public RecordWriter<WritableComparable, BytesRefArrayWritable> getRecordWriter(FileSystem ignored, JobConf job, String name, Progressable progress) throws IOException {
    Path outputPath = getWorkOutputPath(job);
    FileSystem fs = outputPath.getFileSystem(job);
    Path file = new Path(outputPath, name);
    CompressionCodec codec = null;
    if (getCompressOutput(job)) {
        Class<?> codecClass = getOutputCompressorClass(job, DefaultCodec.class);
        codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, job);
    }
    final RCFile.Writer out = new RCFile.Writer(fs, job, file, progress, codec);
    return new RecordWriter<WritableComparable, BytesRefArrayWritable>() {

        @Override
        public void close(Reporter reporter) throws IOException {
            out.close();
        }

        @Override
        public void write(WritableComparable key, BytesRefArrayWritable value) throws IOException {
            out.append(value);
        }
    };
}
Also used : Path(org.apache.hadoop.fs.Path) RecordWriter(org.apache.hadoop.mapred.RecordWriter) BytesRefArrayWritable(org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable) WritableComparable(org.apache.hadoop.io.WritableComparable) FileSystem(org.apache.hadoop.fs.FileSystem) Reporter(org.apache.hadoop.mapred.Reporter) CompressionCodec(org.apache.hadoop.io.compress.CompressionCodec) RecordWriter(org.apache.hadoop.mapred.RecordWriter)

Example 3 with RecordWriter

use of org.apache.hadoop.mapred.RecordWriter in project hive by apache.

the class FileRecordWriterContainer method write.

@Override
public void write(WritableComparable<?> key, HCatRecord value) throws IOException, InterruptedException {
    LocalFileWriter localFileWriter = getLocalFileWriter(value);
    RecordWriter localWriter = localFileWriter.getLocalWriter();
    ObjectInspector localObjectInspector = localFileWriter.getLocalObjectInspector();
    AbstractSerDe localSerDe = localFileWriter.getLocalSerDe();
    OutputJobInfo localJobInfo = localFileWriter.getLocalJobInfo();
    for (Integer colToDel : partColsToDel) {
        value.remove(colToDel);
    }
    // The key given by user is ignored
    try {
        localWriter.write(NullWritable.get(), localSerDe.serialize(value.getAll(), localObjectInspector));
    } catch (SerDeException e) {
        throw new IOException("Failed to serialize object", e);
    }
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) RecordWriter(org.apache.hadoop.mapred.RecordWriter) IOException(java.io.IOException) AbstractSerDe(org.apache.hadoop.hive.serde2.AbstractSerDe) SerDeException(org.apache.hadoop.hive.serde2.SerDeException)

Example 4 with RecordWriter

use of org.apache.hadoop.mapred.RecordWriter in project trevni by cutting.

the class AvroTrevniOutputFormat method getRecordWriter.

@Override
public RecordWriter<AvroWrapper<T>, NullWritable> getRecordWriter(FileSystem ignore, final JobConf job, final String name, Progressable prog) throws IOException {
    boolean isMapOnly = job.getNumReduceTasks() == 0;
    final Schema schema = isMapOnly ? AvroJob.getMapOutputSchema(job) : AvroJob.getOutputSchema(job);
    final ColumnFileMetaData meta = new ColumnFileMetaData();
    for (Map.Entry<String, String> e : job) if (e.getKey().startsWith(META_PREFIX))
        meta.put(e.getKey().substring(AvroJob.TEXT_PREFIX.length()), e.getValue().getBytes(MetaData.UTF8));
    final Path dir = FileOutputFormat.getTaskOutputPath(job, name);
    final FileSystem fs = dir.getFileSystem(job);
    if (!fs.mkdirs(dir))
        throw new IOException("Failed to create directory: " + dir);
    final long blockSize = fs.getDefaultBlockSize();
    return new RecordWriter<AvroWrapper<T>, NullWritable>() {

        private int part = 0;

        private AvroColumnWriter<T> writer = new AvroColumnWriter<T>(schema, meta, ReflectData.get());

        private void flush() throws IOException {
            OutputStream out = fs.create(new Path(dir, "part-" + (part++) + EXT));
            try {
                writer.writeTo(out);
            } finally {
                out.close();
            }
            writer = new AvroColumnWriter<T>(schema, meta, ReflectData.get());
        }

        public void write(AvroWrapper<T> wrapper, NullWritable ignore) throws IOException {
            writer.write(wrapper.datum());
            if (// block full
            writer.sizeEstimate() >= blockSize)
                flush();
        }

        public void close(Reporter reporter) throws IOException {
            flush();
        }
    };
}
Also used : Path(org.apache.hadoop.fs.Path) Schema(org.apache.avro.Schema) OutputStream(java.io.OutputStream) Reporter(org.apache.hadoop.mapred.Reporter) IOException(java.io.IOException) NullWritable(org.apache.hadoop.io.NullWritable) RecordWriter(org.apache.hadoop.mapred.RecordWriter) ColumnFileMetaData(org.apache.trevni.ColumnFileMetaData) FileSystem(org.apache.hadoop.fs.FileSystem) AvroWrapper(org.apache.avro.mapred.AvroWrapper) Map(java.util.Map)

Example 5 with RecordWriter

use of org.apache.hadoop.mapred.RecordWriter in project hbase by apache.

the class TestTableOutputFormatConnectionExhaust method openCloseTableOutputFormat.

/**
   * Open and close a TableOutputFormat.  The closing the RecordWriter should release HBase
   * Connection (ZK) resources, and will throw exception if they are exhausted.
   */
static void openCloseTableOutputFormat(int iter) throws IOException {
    LOG.info("Instantiating TableOutputFormat connection  " + iter);
    JobConf conf = new JobConf();
    conf.addResource(UTIL.getConfiguration());
    conf.set(TableOutputFormat.OUTPUT_TABLE, TABLE);
    TableMapReduceUtil.initTableMapJob(TABLE, FAMILY, TableMap.class, ImmutableBytesWritable.class, ImmutableBytesWritable.class, conf);
    TableOutputFormat tof = new TableOutputFormat();
    RecordWriter rw = tof.getRecordWriter(null, conf, TABLE, null);
    rw.close(null);
}
Also used : RecordWriter(org.apache.hadoop.mapred.RecordWriter) JobConf(org.apache.hadoop.mapred.JobConf)

Aggregations

RecordWriter (org.apache.hadoop.mapred.RecordWriter)9 AbstractSerDe (org.apache.hadoop.hive.serde2.AbstractSerDe)5 IOException (java.io.IOException)3 Properties (java.util.Properties)3 FileSystem (org.apache.hadoop.fs.FileSystem)3 Path (org.apache.hadoop.fs.Path)3 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)3 InputSplit (org.apache.hadoop.mapred.InputSplit)3 Reporter (org.apache.hadoop.mapred.Reporter)3 Test (org.junit.Test)3 SearchArgument (org.apache.hadoop.hive.ql.io.sarg.SearchArgument)2 SerDeException (org.apache.hadoop.hive.serde2.SerDeException)2 JobConf (org.apache.hadoop.mapred.JobConf)2 OutputStream (java.io.OutputStream)1 ArrayList (java.util.ArrayList)1 Map (java.util.Map)1 TreeMap (java.util.TreeMap)1 Schema (org.apache.avro.Schema)1 AvroWrapper (org.apache.avro.mapred.AvroWrapper)1 BytesRefArrayWritable (org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable)1