Search in sources :

Example 1 with RecordReader

use of org.apache.hadoop.mapred.RecordReader in project hadoop by apache.

the class DumpTypedBytes method dumpTypedBytes.

/**
   * Dump given list of files to standard output as typed bytes.
   */
@SuppressWarnings("unchecked")
private int dumpTypedBytes(List<FileStatus> files) throws IOException {
    JobConf job = new JobConf(getConf());
    DataOutputStream dout = new DataOutputStream(System.out);
    AutoInputFormat autoInputFormat = new AutoInputFormat();
    for (FileStatus fileStatus : files) {
        FileSplit split = new FileSplit(fileStatus.getPath(), 0, fileStatus.getLen() * fileStatus.getBlockSize(), (String[]) null);
        RecordReader recReader = null;
        try {
            recReader = autoInputFormat.getRecordReader(split, job, Reporter.NULL);
            Object key = recReader.createKey();
            Object value = recReader.createValue();
            while (recReader.next(key, value)) {
                if (key instanceof Writable) {
                    TypedBytesWritableOutput.get(dout).write((Writable) key);
                } else {
                    TypedBytesOutput.get(dout).write(key);
                }
                if (value instanceof Writable) {
                    TypedBytesWritableOutput.get(dout).write((Writable) value);
                } else {
                    TypedBytesOutput.get(dout).write(value);
                }
            }
        } finally {
            if (recReader != null) {
                recReader.close();
            }
        }
    }
    dout.flush();
    return 0;
}
Also used : FileStatus(org.apache.hadoop.fs.FileStatus) DataOutputStream(java.io.DataOutputStream) RecordReader(org.apache.hadoop.mapred.RecordReader) Writable(org.apache.hadoop.io.Writable) FileSplit(org.apache.hadoop.mapred.FileSplit) JobConf(org.apache.hadoop.mapred.JobConf)

Example 2 with RecordReader

use of org.apache.hadoop.mapred.RecordReader in project hadoop by apache.

the class TestAutoInputFormat method testFormat.

@SuppressWarnings({ "unchecked", "deprecation" })
@Test
public void testFormat() throws IOException {
    JobConf job = new JobConf(conf);
    FileSystem fs = FileSystem.getLocal(conf);
    Path dir = new Path(System.getProperty("test.build.data", ".") + "/mapred");
    Path txtFile = new Path(dir, "auto.txt");
    Path seqFile = new Path(dir, "auto.seq");
    fs.delete(dir, true);
    FileInputFormat.setInputPaths(job, dir);
    Writer txtWriter = new OutputStreamWriter(fs.create(txtFile));
    try {
        for (int i = 0; i < LINES_COUNT; i++) {
            txtWriter.write("" + (10 * i));
            txtWriter.write("\n");
        }
    } finally {
        txtWriter.close();
    }
    SequenceFile.Writer seqWriter = SequenceFile.createWriter(fs, conf, seqFile, IntWritable.class, LongWritable.class);
    try {
        for (int i = 0; i < RECORDS_COUNT; i++) {
            IntWritable key = new IntWritable(11 * i);
            LongWritable value = new LongWritable(12 * i);
            seqWriter.append(key, value);
        }
    } finally {
        seqWriter.close();
    }
    AutoInputFormat format = new AutoInputFormat();
    InputSplit[] splits = format.getSplits(job, SPLITS_COUNT);
    for (InputSplit split : splits) {
        RecordReader reader = format.getRecordReader(split, job, Reporter.NULL);
        Object key = reader.createKey();
        Object value = reader.createValue();
        try {
            while (reader.next(key, value)) {
                if (key instanceof LongWritable) {
                    assertEquals("Wrong value class.", Text.class, value.getClass());
                    assertTrue("Invalid value", Integer.parseInt(((Text) value).toString()) % 10 == 0);
                } else {
                    assertEquals("Wrong key class.", IntWritable.class, key.getClass());
                    assertEquals("Wrong value class.", LongWritable.class, value.getClass());
                    assertTrue("Invalid key.", ((IntWritable) key).get() % 11 == 0);
                    assertTrue("Invalid value.", ((LongWritable) value).get() % 12 == 0);
                }
            }
        } finally {
            reader.close();
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) AutoInputFormat(org.apache.hadoop.streaming.AutoInputFormat) RecordReader(org.apache.hadoop.mapred.RecordReader) Text(org.apache.hadoop.io.Text) SequenceFile(org.apache.hadoop.io.SequenceFile) FileSystem(org.apache.hadoop.fs.FileSystem) OutputStreamWriter(java.io.OutputStreamWriter) LongWritable(org.apache.hadoop.io.LongWritable) JobConf(org.apache.hadoop.mapred.JobConf) InputSplit(org.apache.hadoop.mapred.InputSplit) Writer(java.io.Writer) OutputStreamWriter(java.io.OutputStreamWriter) IntWritable(org.apache.hadoop.io.IntWritable) Test(org.junit.Test)

Example 3 with RecordReader

use of org.apache.hadoop.mapred.RecordReader in project hive by apache.

the class HiveHBaseTableInputFormat method getRecordReader.

@Override
public RecordReader<ImmutableBytesWritable, ResultWritable> getRecordReader(InputSplit split, JobConf jobConf, final Reporter reporter) throws IOException {
    HBaseSplit hbaseSplit = (HBaseSplit) split;
    TableSplit tableSplit = hbaseSplit.getTableSplit();
    if (conn == null) {
        conn = ConnectionFactory.createConnection(HBaseConfiguration.create(jobConf));
    }
    initializeTable(conn, tableSplit.getTable());
    setScan(HiveHBaseInputFormatUtil.getScan(jobConf));
    Job job = new Job(jobConf);
    TaskAttemptContext tac = ShimLoader.getHadoopShims().newTaskAttemptContext(job.getConfiguration(), reporter);
    final org.apache.hadoop.mapreduce.RecordReader<ImmutableBytesWritable, Result> recordReader = createRecordReader(tableSplit, tac);
    try {
        recordReader.initialize(tableSplit, tac);
    } catch (InterruptedException e) {
        // Free up the HTable connections
        closeTable();
        if (conn != null) {
            conn.close();
            conn = null;
        }
        throw new IOException("Failed to initialize RecordReader", e);
    }
    return new RecordReader<ImmutableBytesWritable, ResultWritable>() {

        @Override
        public void close() throws IOException {
            recordReader.close();
            closeTable();
            if (conn != null) {
                conn.close();
                conn = null;
            }
        }

        @Override
        public ImmutableBytesWritable createKey() {
            return new ImmutableBytesWritable();
        }

        @Override
        public ResultWritable createValue() {
            return new ResultWritable(new Result());
        }

        @Override
        public long getPos() throws IOException {
            return 0;
        }

        @Override
        public float getProgress() throws IOException {
            float progress = 0.0F;
            try {
                progress = recordReader.getProgress();
            } catch (InterruptedException e) {
                throw new IOException(e);
            }
            return progress;
        }

        @Override
        public boolean next(ImmutableBytesWritable rowKey, ResultWritable value) throws IOException {
            boolean next = false;
            try {
                next = recordReader.nextKeyValue();
                if (next) {
                    rowKey.set(recordReader.getCurrentValue().getRow());
                    value.setResult(recordReader.getCurrentValue());
                }
            } catch (InterruptedException e) {
                throw new IOException(e);
            }
            return next;
        }
    };
}
Also used : ImmutableBytesWritable(org.apache.hadoop.hbase.io.ImmutableBytesWritable) RecordReader(org.apache.hadoop.mapred.RecordReader) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) IOException(java.io.IOException) Result(org.apache.hadoop.hbase.client.Result) TableSplit(org.apache.hadoop.hbase.mapreduce.TableSplit) Job(org.apache.hadoop.mapreduce.Job)

Example 4 with RecordReader

use of org.apache.hadoop.mapred.RecordReader in project hive by apache.

the class HiveContextAwareRecordReader method initIOContext.

public void initIOContext(FileSplit split, JobConf job, Class inputFormatClass, RecordReader recordReader) throws IOException {
    boolean blockPointer = false;
    long blockStart = -1;
    FileSplit fileSplit = split;
    Path path = fileSplit.getPath();
    FileSystem fs = path.getFileSystem(job);
    if (inputFormatClass.getName().contains("SequenceFile")) {
        SequenceFile.Reader in = new SequenceFile.Reader(fs, path, job);
        blockPointer = in.isBlockCompressed();
        in.sync(fileSplit.getStart());
        blockStart = in.getPosition();
        in.close();
    } else if (recordReader instanceof RCFileRecordReader) {
        blockPointer = true;
        blockStart = ((RCFileRecordReader) recordReader).getStart();
    } else if (inputFormatClass.getName().contains("RCFile")) {
        blockPointer = true;
        RCFile.Reader in = new RCFile.Reader(fs, path, job);
        in.sync(fileSplit.getStart());
        blockStart = in.getPosition();
        in.close();
    }
    this.jobConf = job;
    this.initIOContext(blockStart, blockPointer, path.makeQualified(fs));
    this.initIOContextSortedProps(split, recordReader, job);
}
Also used : Path(org.apache.hadoop.fs.Path) SequenceFile(org.apache.hadoop.io.SequenceFile) FileSystem(org.apache.hadoop.fs.FileSystem) RecordReader(org.apache.hadoop.mapred.RecordReader) FileSplit(org.apache.hadoop.mapred.FileSplit)

Example 5 with RecordReader

use of org.apache.hadoop.mapred.RecordReader in project hive by apache.

the class SymlinkTextInputFormat method getRecordReader.

@Override
public RecordReader<LongWritable, Text> getRecordReader(InputSplit split, JobConf job, Reporter reporter) throws IOException {
    InputSplit targetSplit = ((SymlinkTextInputSplit) split).getTargetSplit();
    // The target data is in TextInputFormat.
    TextInputFormat inputFormat = new TextInputFormat();
    inputFormat.configure(job);
    RecordReader innerReader = null;
    try {
        innerReader = inputFormat.getRecordReader(targetSplit, job, reporter);
    } catch (Exception e) {
        innerReader = HiveIOExceptionHandlerUtil.handleRecordReaderCreationException(e, job);
    }
    HiveRecordReader rr = new HiveRecordReader(innerReader, job);
    rr.initIOContext((FileSplit) targetSplit, job, TextInputFormat.class, innerReader);
    return rr;
}
Also used : TextInputFormat(org.apache.hadoop.mapred.TextInputFormat) RecordReader(org.apache.hadoop.mapred.RecordReader) InputSplit(org.apache.hadoop.mapred.InputSplit) IOException(java.io.IOException)

Aggregations

RecordReader (org.apache.hadoop.mapred.RecordReader)17 Path (org.apache.hadoop.fs.Path)9 FileSplit (org.apache.hadoop.mapred.FileSplit)8 IOException (java.io.IOException)7 JobConf (org.apache.hadoop.mapred.JobConf)7 InputSplit (org.apache.hadoop.mapred.InputSplit)6 FileSystem (org.apache.hadoop.fs.FileSystem)5 InputFormat (org.apache.hadoop.mapred.InputFormat)4 Text (org.apache.hadoop.io.Text)3 Configuration (org.apache.hadoop.conf.Configuration)2 SerDeException (org.apache.hadoop.hive.serde2.SerDeException)2 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)2 SequenceFile (org.apache.hadoop.io.SequenceFile)2 Reporter (org.apache.hadoop.mapred.Reporter)2 PARTITION_KEY (com.facebook.presto.hive.HiveColumnHandle.ColumnType.PARTITION_KEY)1 REGULAR (com.facebook.presto.hive.HiveColumnHandle.ColumnType.REGULAR)1 HiveColumnHandle.bucketColumnHandle (com.facebook.presto.hive.HiveColumnHandle.bucketColumnHandle)1 HiveColumnHandle.isBucketColumnHandle (com.facebook.presto.hive.HiveColumnHandle.isBucketColumnHandle)1 HiveColumnHandle.isPathColumnHandle (com.facebook.presto.hive.HiveColumnHandle.isPathColumnHandle)1 HiveColumnHandle.pathColumnHandle (com.facebook.presto.hive.HiveColumnHandle.pathColumnHandle)1