Search in sources :

Example 16 with Reporter

use of org.apache.hadoop.mapred.Reporter in project hive by apache.

the class DynamicPartitionFileRecordWriterContainer method close.

@Override
public void close(TaskAttemptContext context) throws IOException, InterruptedException {
    Reporter reporter = InternalUtil.createReporter(context);
    for (RecordWriter<? super WritableComparable<?>, ? super Writable> bwriter : baseDynamicWriters.values()) {
        // We are in RecordWriter.close() make sense that the context would be
        // TaskInputOutput.
        bwriter.close(reporter);
    }
    TaskCommitContextRegistry.getInstance().register(context, new TaskCommitContextRegistry.TaskCommitterProxy() {

        @Override
        public void abortTask(TaskAttemptContext context) throws IOException {
            for (Map.Entry<String, OutputJobInfo> outputJobInfoEntry : dynamicOutputJobInfo.entrySet()) {
                String dynKey = outputJobInfoEntry.getKey();
                OutputJobInfo outputJobInfo = outputJobInfoEntry.getValue();
                LOG.info("Aborting task-attempt for " + outputJobInfo.getLocation());
                baseDynamicCommitters.get(dynKey).abortTask(dynamicContexts.get(dynKey));
            }
        }

        @Override
        public void commitTask(TaskAttemptContext context) throws IOException {
            for (Map.Entry<String, OutputJobInfo> outputJobInfoEntry : dynamicOutputJobInfo.entrySet()) {
                String dynKey = outputJobInfoEntry.getKey();
                OutputJobInfo outputJobInfo = outputJobInfoEntry.getValue();
                LOG.info("Committing task-attempt for " + outputJobInfo.getLocation());
                TaskAttemptContext dynContext = dynamicContexts.get(dynKey);
                OutputCommitter dynCommitter = baseDynamicCommitters.get(dynKey);
                if (dynCommitter.needsTaskCommit(dynContext)) {
                    dynCommitter.commitTask(dynContext);
                } else {
                    LOG.info("Skipping commitTask() for " + outputJobInfo.getLocation());
                }
            }
        }
    });
}
Also used : FileOutputCommitter(org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter) OutputCommitter(org.apache.hadoop.mapreduce.OutputCommitter) Reporter(org.apache.hadoop.mapred.Reporter) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) IOException(java.io.IOException)

Example 17 with Reporter

use of org.apache.hadoop.mapred.Reporter in project hadoop by apache.

the class MultipleOutputFormat method getRecordWriter.

/**
   * Create a composite record writer that can write key/value data to different
   * output files
   * 
   * @param fs
   *          the file system to use
   * @param job
   *          the job conf for the job
   * @param name
   *          the leaf file name for the output file (such as part-00000")
   * @param arg3
   *          a progressable for reporting progress.
   * @return a composite record writer
   * @throws IOException
   */
public RecordWriter<K, V> getRecordWriter(FileSystem fs, JobConf job, String name, Progressable arg3) throws IOException {
    final FileSystem myFS = fs;
    final String myName = generateLeafFileName(name);
    final JobConf myJob = job;
    final Progressable myProgressable = arg3;
    return new RecordWriter<K, V>() {

        // a cache storing the record writers for different output files.
        TreeMap<String, RecordWriter<K, V>> recordWriters = new TreeMap<String, RecordWriter<K, V>>();

        public void write(K key, V value) throws IOException {
            // get the file name based on the key
            String keyBasedPath = generateFileNameForKeyValue(key, value, myName);
            // get the file name based on the input file name
            String finalPath = getInputFileBasedOutputFileName(myJob, keyBasedPath);
            // get the actual key
            K actualKey = generateActualKey(key, value);
            V actualValue = generateActualValue(key, value);
            RecordWriter<K, V> rw = this.recordWriters.get(finalPath);
            if (rw == null) {
                // if we don't have the record writer yet for the final path, create
                // one
                // and add it to the cache
                rw = getBaseRecordWriter(myFS, myJob, finalPath, myProgressable);
                this.recordWriters.put(finalPath, rw);
            }
            rw.write(actualKey, actualValue);
        }

        ;

        public void close(Reporter reporter) throws IOException {
            Iterator<String> keys = this.recordWriters.keySet().iterator();
            while (keys.hasNext()) {
                RecordWriter<K, V> rw = this.recordWriters.get(keys.next());
                rw.close(reporter);
            }
            this.recordWriters.clear();
        }

        ;
    };
}
Also used : Progressable(org.apache.hadoop.util.Progressable) RecordWriter(org.apache.hadoop.mapred.RecordWriter) FileSystem(org.apache.hadoop.fs.FileSystem) Reporter(org.apache.hadoop.mapred.Reporter) TreeMap(java.util.TreeMap) JobConf(org.apache.hadoop.mapred.JobConf)

Example 18 with Reporter

use of org.apache.hadoop.mapred.Reporter in project hadoop by apache.

the class TestCombineFileRecordReader method testInitNextRecordReader.

@SuppressWarnings("unchecked")
@Test
public void testInitNextRecordReader() throws IOException {
    JobConf conf = new JobConf();
    Path[] paths = new Path[3];
    long[] fileLength = new long[3];
    File[] files = new File[3];
    LongWritable key = new LongWritable(1);
    Text value = new Text();
    try {
        for (int i = 0; i < 3; i++) {
            fileLength[i] = i;
            File dir = new File(outDir.toString());
            dir.mkdir();
            files[i] = new File(dir, "testfile" + i);
            FileWriter fileWriter = new FileWriter(files[i]);
            fileWriter.close();
            paths[i] = new Path(outDir + "/testfile" + i);
        }
        CombineFileSplit combineFileSplit = new CombineFileSplit(conf, paths, fileLength);
        Reporter reporter = Mockito.mock(Reporter.class);
        CombineFileRecordReader cfrr = new CombineFileRecordReader(conf, combineFileSplit, reporter, TextRecordReaderWrapper.class);
        verify(reporter).progress();
        Assert.assertFalse(cfrr.next(key, value));
        verify(reporter, times(3)).progress();
    } finally {
        FileUtil.fullyDelete(new File(outDir.toString()));
    }
}
Also used : Path(org.apache.hadoop.fs.Path) FileWriter(java.io.FileWriter) Reporter(org.apache.hadoop.mapred.Reporter) Text(org.apache.hadoop.io.Text) LongWritable(org.apache.hadoop.io.LongWritable) JobConf(org.apache.hadoop.mapred.JobConf) File(java.io.File) Test(org.junit.Test)

Example 19 with Reporter

use of org.apache.hadoop.mapred.Reporter in project hadoop by apache.

the class TestShufflePlugin method testConsumerApi.

@Test
public /**
   * A testing method verifying availability and accessibility of API that is needed
   * for sub-classes of ShuffleConsumerPlugin
   */
void testConsumerApi() {
    JobConf jobConf = new JobConf();
    ShuffleConsumerPlugin<K, V> shuffleConsumerPlugin = new TestShuffleConsumerPlugin<K, V>();
    //mock creation
    ReduceTask mockReduceTask = mock(ReduceTask.class);
    TaskUmbilicalProtocol mockUmbilical = mock(TaskUmbilicalProtocol.class);
    Reporter mockReporter = mock(Reporter.class);
    FileSystem mockFileSystem = mock(FileSystem.class);
    Class<? extends org.apache.hadoop.mapred.Reducer> combinerClass = jobConf.getCombinerClass();
    // needed for mock with generic
    @SuppressWarnings("unchecked") CombineOutputCollector<K, V> mockCombineOutputCollector = (CombineOutputCollector<K, V>) mock(CombineOutputCollector.class);
    org.apache.hadoop.mapreduce.TaskAttemptID mockTaskAttemptID = mock(org.apache.hadoop.mapreduce.TaskAttemptID.class);
    LocalDirAllocator mockLocalDirAllocator = mock(LocalDirAllocator.class);
    CompressionCodec mockCompressionCodec = mock(CompressionCodec.class);
    Counter mockCounter = mock(Counter.class);
    TaskStatus mockTaskStatus = mock(TaskStatus.class);
    Progress mockProgress = mock(Progress.class);
    MapOutputFile mockMapOutputFile = mock(MapOutputFile.class);
    Task mockTask = mock(Task.class);
    try {
        String[] dirs = jobConf.getLocalDirs();
        // verify that these APIs are available through super class handler
        ShuffleConsumerPlugin.Context<K, V> context = new ShuffleConsumerPlugin.Context<K, V>(mockTaskAttemptID, jobConf, mockFileSystem, mockUmbilical, mockLocalDirAllocator, mockReporter, mockCompressionCodec, combinerClass, mockCombineOutputCollector, mockCounter, mockCounter, mockCounter, mockCounter, mockCounter, mockCounter, mockTaskStatus, mockProgress, mockProgress, mockTask, mockMapOutputFile, null);
        shuffleConsumerPlugin.init(context);
        shuffleConsumerPlugin.run();
        shuffleConsumerPlugin.close();
    } catch (Exception e) {
        assertTrue("Threw exception:" + e, false);
    }
    // verify that these APIs are available for 3rd party plugins
    mockReduceTask.getTaskID();
    mockReduceTask.getJobID();
    mockReduceTask.getNumMaps();
    mockReduceTask.getPartition();
    mockReporter.progress();
}
Also used : ReduceTask(org.apache.hadoop.mapred.ReduceTask) Task(org.apache.hadoop.mapred.Task) ShuffleConsumerPlugin(org.apache.hadoop.mapred.ShuffleConsumerPlugin) Counter(org.apache.hadoop.mapred.Counters.Counter) FileSystem(org.apache.hadoop.fs.FileSystem) CompressionCodec(org.apache.hadoop.io.compress.CompressionCodec) JobConf(org.apache.hadoop.mapred.JobConf) MapOutputFile(org.apache.hadoop.mapred.MapOutputFile) Progress(org.apache.hadoop.util.Progress) Reporter(org.apache.hadoop.mapred.Reporter) TaskStatus(org.apache.hadoop.mapred.TaskStatus) CombineOutputCollector(org.apache.hadoop.mapred.Task.CombineOutputCollector) TaskUmbilicalProtocol(org.apache.hadoop.mapred.TaskUmbilicalProtocol) ReduceTask(org.apache.hadoop.mapred.ReduceTask) LocalDirAllocator(org.apache.hadoop.fs.LocalDirAllocator) Test(org.junit.Test)

Example 20 with Reporter

use of org.apache.hadoop.mapred.Reporter in project hadoop by apache.

the class TestDBInputFormat method testDBInputFormat.

/**
   * test DBInputFormat class. Class should split result for chunks
   * @throws Exception
   */
@Test(timeout = 10000)
public void testDBInputFormat() throws Exception {
    JobConf configuration = new JobConf();
    setupDriver(configuration);
    DBInputFormat<NullDBWritable> format = new DBInputFormat<NullDBWritable>();
    format.setConf(configuration);
    format.setConf(configuration);
    DBInputFormat.DBInputSplit splitter = new DBInputFormat.DBInputSplit(1, 10);
    Reporter reporter = mock(Reporter.class);
    RecordReader<LongWritable, NullDBWritable> reader = format.getRecordReader(splitter, configuration, reporter);
    configuration.setInt(MRJobConfig.NUM_MAPS, 3);
    InputSplit[] lSplits = format.getSplits(configuration, 3);
    assertEquals(5, lSplits[0].getLength());
    assertEquals(3, lSplits.length);
    // test reader .Some simple tests
    assertEquals(LongWritable.class, reader.createKey().getClass());
    assertEquals(0, reader.getPos());
    assertEquals(0, reader.getProgress(), 0.001);
    reader.close();
}
Also used : DBInputSplit(org.apache.hadoop.mapred.lib.db.DBInputFormat.DBInputSplit) NullDBWritable(org.apache.hadoop.mapred.lib.db.DBInputFormat.NullDBWritable) Reporter(org.apache.hadoop.mapred.Reporter) DBInputSplit(org.apache.hadoop.mapred.lib.db.DBInputFormat.DBInputSplit) LongWritable(org.apache.hadoop.io.LongWritable) JobConf(org.apache.hadoop.mapred.JobConf) DBInputSplit(org.apache.hadoop.mapred.lib.db.DBInputFormat.DBInputSplit) InputSplit(org.apache.hadoop.mapred.InputSplit) DriverForTest(org.apache.hadoop.mapreduce.lib.db.DriverForTest) Test(org.junit.Test)

Aggregations

Reporter (org.apache.hadoop.mapred.Reporter)23 JobConf (org.apache.hadoop.mapred.JobConf)13 Test (org.junit.Test)12 FileSystem (org.apache.hadoop.fs.FileSystem)6 Result (org.apache.hadoop.hbase.client.Result)5 ImmutableBytesWritable (org.apache.hadoop.hbase.io.ImmutableBytesWritable)5 InputSplit (org.apache.hadoop.mapred.InputSplit)5 Path (org.apache.hadoop.fs.Path)4 CompressionCodec (org.apache.hadoop.io.compress.CompressionCodec)4 Counter (org.apache.hadoop.mapred.Counters.Counter)4 File (java.io.File)3 IOException (java.io.IOException)3 Configuration (org.apache.hadoop.conf.Configuration)3 LocalDirAllocator (org.apache.hadoop.fs.LocalDirAllocator)3 Cell (org.apache.hadoop.hbase.Cell)3 KeyValue (org.apache.hadoop.hbase.KeyValue)3 LongWritable (org.apache.hadoop.io.LongWritable)3 Text (org.apache.hadoop.io.Text)3 MapOutputFile (org.apache.hadoop.mapred.MapOutputFile)3 ShuffleConsumerPlugin (org.apache.hadoop.mapred.ShuffleConsumerPlugin)3