Examples with JobConf - org.apache.hadoop.mapred.JobConf

Example 66 with JobConf

use of org.apache.hadoop.mapred.JobConf in project hadoop by apache.

the class TestJobControl method getCopyJob.

public Job getCopyJob() throws Exception {
    Configuration defaults = new Configuration();
    FileSystem fs = FileSystem.get(defaults);
    Path rootDataDir = new Path(System.getProperty("test.build.data", "."), "TestJobControlData");
    Path indir = new Path(rootDataDir, "indir");
    Path outdir_1 = new Path(rootDataDir, "outdir_1");
    JobControlTestUtils.cleanData(fs, indir);
    JobControlTestUtils.generateData(fs, indir);
    JobControlTestUtils.cleanData(fs, outdir_1);
    ArrayList<Job> dependingJobs = null;
    ArrayList<Path> inPaths_1 = new ArrayList<Path>();
    inPaths_1.add(indir);
    JobConf jobConf_1 = JobControlTestUtils.createCopyJob(inPaths_1, outdir_1);
    Job job_1 = new Job(jobConf_1, dependingJobs);
    return job_1;
}

Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) FileSystem(org.apache.hadoop.fs.FileSystem) ArrayList(java.util.ArrayList) ControlledJob(org.apache.hadoop.mapreduce.lib.jobcontrol.ControlledJob) JobConf(org.apache.hadoop.mapred.JobConf)

Example 67 with JobConf

use of org.apache.hadoop.mapred.JobConf in project hadoop by apache.

the class TestLocalJobControl method testLocalJobControlDataCopy.

/**
   * This is a main function for testing JobControl class. It first cleans all
   * the dirs it will use. Then it generates some random text data in
   * TestJobControlData/indir. Then it creates 4 jobs: Job 1: copy data from
   * indir to outdir_1 Job 2: copy data from indir to outdir_2 Job 3: copy data
   * from outdir_1 and outdir_2 to outdir_3 Job 4: copy data from outdir to
   * outdir_4 The jobs 1 and 2 have no dependency. The job 3 depends on jobs 1
   * and 2. The job 4 depends on job 3.
   * 
   * Then it creates a JobControl object and add the 4 jobs to the JobControl
   * object. Finally, it creates a thread to run the JobControl object and
   * monitors/reports the job states.
   */
@Test
public void testLocalJobControlDataCopy() throws Exception {
    FileSystem fs = FileSystem.get(createJobConf());
    Path rootDataDir = new Path(System.getProperty("test.build.data", "."), "TestLocalJobControlData");
    Path indir = new Path(rootDataDir, "indir");
    Path outdir_1 = new Path(rootDataDir, "outdir_1");
    Path outdir_2 = new Path(rootDataDir, "outdir_2");
    Path outdir_3 = new Path(rootDataDir, "outdir_3");
    Path outdir_4 = new Path(rootDataDir, "outdir_4");
    JobControlTestUtils.cleanData(fs, indir);
    JobControlTestUtils.generateData(fs, indir);
    JobControlTestUtils.cleanData(fs, outdir_1);
    JobControlTestUtils.cleanData(fs, outdir_2);
    JobControlTestUtils.cleanData(fs, outdir_3);
    JobControlTestUtils.cleanData(fs, outdir_4);
    ArrayList<Job> dependingJobs = null;
    ArrayList<Path> inPaths_1 = new ArrayList<Path>();
    inPaths_1.add(indir);
    JobConf jobConf_1 = JobControlTestUtils.createCopyJob(inPaths_1, outdir_1);
    Job job_1 = new Job(jobConf_1, dependingJobs);
    ArrayList<Path> inPaths_2 = new ArrayList<Path>();
    inPaths_2.add(indir);
    JobConf jobConf_2 = JobControlTestUtils.createCopyJob(inPaths_2, outdir_2);
    Job job_2 = new Job(jobConf_2, dependingJobs);
    ArrayList<Path> inPaths_3 = new ArrayList<Path>();
    inPaths_3.add(outdir_1);
    inPaths_3.add(outdir_2);
    JobConf jobConf_3 = JobControlTestUtils.createCopyJob(inPaths_3, outdir_3);
    dependingJobs = new ArrayList<Job>();
    dependingJobs.add(job_1);
    dependingJobs.add(job_2);
    Job job_3 = new Job(jobConf_3, dependingJobs);
    ArrayList<Path> inPaths_4 = new ArrayList<Path>();
    inPaths_4.add(outdir_3);
    JobConf jobConf_4 = JobControlTestUtils.createCopyJob(inPaths_4, outdir_4);
    dependingJobs = new ArrayList<Job>();
    dependingJobs.add(job_3);
    Job job_4 = new Job(jobConf_4, dependingJobs);
    JobControl theControl = new JobControl("Test");
    theControl.addJob(job_1);
    theControl.addJob(job_2);
    theControl.addJob(job_3);
    theControl.addJob(job_4);
    Thread theController = new Thread(theControl);
    theController.start();
    while (!theControl.allFinished()) {
        LOG.debug("Jobs in waiting state: " + theControl.getWaitingJobs().size());
        LOG.debug("Jobs in ready state: " + theControl.getReadyJobs().size());
        LOG.debug("Jobs in running state: " + theControl.getRunningJobs().size());
        LOG.debug("Jobs in success state: " + theControl.getSuccessfulJobs().size());
        LOG.debug("Jobs in failed state: " + theControl.getFailedJobs().size());
        LOG.debug("\n");
        try {
            Thread.sleep(5000);
        } catch (Exception e) {
        }
    }
    assertEquals("Some jobs failed", 0, theControl.getFailedJobs().size());
    theControl.stop();
}

Also used : Path(org.apache.hadoop.fs.Path) FileSystem(org.apache.hadoop.fs.FileSystem) ArrayList(java.util.ArrayList) JobConf(org.apache.hadoop.mapred.JobConf) IOException(java.io.IOException) Test(org.junit.Test)

Example 68 with JobConf

use of org.apache.hadoop.mapred.JobConf in project hadoop by apache.

the class StreamJob method setJobConf.

protected void setJobConf() throws IOException {
    if (additionalConfSpec_ != null) {
        LOG.warn("-additionalconfspec option is deprecated, please use -conf instead.");
        config_.addResource(new Path(additionalConfSpec_));
    }
    // general MapRed job properties
    jobConf_ = new JobConf(config_, StreamJob.class);
    // (mapreduce.job.working.dir will be lazily initialized ONCE and depends on FS)
    for (int i = 0; i < inputSpecs_.size(); i++) {
        FileInputFormat.addInputPaths(jobConf_, (String) inputSpecs_.get(i));
    }
    String defaultPackage = this.getClass().getPackage().getName();
    Class c;
    Class fmt = null;
    if (inReaderSpec_ == null && inputFormatSpec_ == null) {
        fmt = TextInputFormat.class;
    } else if (inputFormatSpec_ != null) {
        if (inputFormatSpec_.equals(TextInputFormat.class.getName()) || inputFormatSpec_.equals(TextInputFormat.class.getCanonicalName()) || inputFormatSpec_.equals(TextInputFormat.class.getSimpleName())) {
            fmt = TextInputFormat.class;
        } else if (inputFormatSpec_.equals(KeyValueTextInputFormat.class.getName()) || inputFormatSpec_.equals(KeyValueTextInputFormat.class.getCanonicalName()) || inputFormatSpec_.equals(KeyValueTextInputFormat.class.getSimpleName())) {
            if (inReaderSpec_ == null) {
                fmt = KeyValueTextInputFormat.class;
            }
        } else if (inputFormatSpec_.equals(SequenceFileInputFormat.class.getName()) || inputFormatSpec_.equals(org.apache.hadoop.mapred.SequenceFileInputFormat.class.getCanonicalName()) || inputFormatSpec_.equals(org.apache.hadoop.mapred.SequenceFileInputFormat.class.getSimpleName())) {
            if (inReaderSpec_ == null) {
                fmt = SequenceFileInputFormat.class;
            }
        } else if (inputFormatSpec_.equals(SequenceFileAsTextInputFormat.class.getName()) || inputFormatSpec_.equals(SequenceFileAsTextInputFormat.class.getCanonicalName()) || inputFormatSpec_.equals(SequenceFileAsTextInputFormat.class.getSimpleName())) {
            fmt = SequenceFileAsTextInputFormat.class;
        } else {
            c = StreamUtil.goodClassOrNull(jobConf_, inputFormatSpec_, defaultPackage);
            if (c != null) {
                fmt = c;
            } else {
                fail("-inputformat : class not found : " + inputFormatSpec_);
            }
        }
    }
    if (fmt == null) {
        fmt = StreamInputFormat.class;
    }
    jobConf_.setInputFormat(fmt);
    if (ioSpec_ != null) {
        jobConf_.set("stream.map.input", ioSpec_);
        jobConf_.set("stream.map.output", ioSpec_);
        jobConf_.set("stream.reduce.input", ioSpec_);
        jobConf_.set("stream.reduce.output", ioSpec_);
    }
    Class<? extends IdentifierResolver> idResolverClass = jobConf_.getClass("stream.io.identifier.resolver.class", IdentifierResolver.class, IdentifierResolver.class);
    IdentifierResolver idResolver = ReflectionUtils.newInstance(idResolverClass, jobConf_);
    idResolver.resolve(jobConf_.get("stream.map.input", IdentifierResolver.TEXT_ID));
    jobConf_.setClass("stream.map.input.writer.class", idResolver.getInputWriterClass(), InputWriter.class);
    idResolver.resolve(jobConf_.get("stream.reduce.input", IdentifierResolver.TEXT_ID));
    jobConf_.setClass("stream.reduce.input.writer.class", idResolver.getInputWriterClass(), InputWriter.class);
    jobConf_.set("stream.addenvironment", addTaskEnvironment_);
    boolean isMapperACommand = false;
    if (mapCmd_ != null) {
        c = StreamUtil.goodClassOrNull(jobConf_, mapCmd_, defaultPackage);
        if (c != null) {
            jobConf_.setMapperClass(c);
        } else {
            isMapperACommand = true;
            jobConf_.setMapperClass(PipeMapper.class);
            jobConf_.setMapRunnerClass(PipeMapRunner.class);
            jobConf_.set("stream.map.streamprocessor", URLEncoder.encode(mapCmd_, "UTF-8"));
        }
    }
    if (comCmd_ != null) {
        c = StreamUtil.goodClassOrNull(jobConf_, comCmd_, defaultPackage);
        if (c != null) {
            jobConf_.setCombinerClass(c);
        } else {
            jobConf_.setCombinerClass(PipeCombiner.class);
            jobConf_.set("stream.combine.streamprocessor", URLEncoder.encode(comCmd_, "UTF-8"));
        }
    }
    if (numReduceTasksSpec_ != null) {
        int numReduceTasks = Integer.parseInt(numReduceTasksSpec_);
        jobConf_.setNumReduceTasks(numReduceTasks);
    }
    boolean isReducerACommand = false;
    if (redCmd_ != null) {
        if (redCmd_.equals(REDUCE_NONE)) {
            jobConf_.setNumReduceTasks(0);
        }
        if (jobConf_.getNumReduceTasks() != 0) {
            if (redCmd_.compareToIgnoreCase("aggregate") == 0) {
                jobConf_.setReducerClass(ValueAggregatorReducer.class);
                jobConf_.setCombinerClass(ValueAggregatorCombiner.class);
            } else {
                c = StreamUtil.goodClassOrNull(jobConf_, redCmd_, defaultPackage);
                if (c != null) {
                    jobConf_.setReducerClass(c);
                } else {
                    isReducerACommand = true;
                    jobConf_.setReducerClass(PipeReducer.class);
                    jobConf_.set("stream.reduce.streamprocessor", URLEncoder.encode(redCmd_, "UTF-8"));
                }
            }
        }
    }
    idResolver.resolve(jobConf_.get("stream.map.output", IdentifierResolver.TEXT_ID));
    jobConf_.setClass("stream.map.output.reader.class", idResolver.getOutputReaderClass(), OutputReader.class);
    if (isMapperACommand || jobConf_.get("stream.map.output") != null) {
        // if mapper is a command, then map output key/value classes come from the
        // idResolver
        jobConf_.setMapOutputKeyClass(idResolver.getOutputKeyClass());
        jobConf_.setMapOutputValueClass(idResolver.getOutputValueClass());
        if (jobConf_.getNumReduceTasks() == 0) {
            jobConf_.setOutputKeyClass(idResolver.getOutputKeyClass());
            jobConf_.setOutputValueClass(idResolver.getOutputValueClass());
        }
    }
    idResolver.resolve(jobConf_.get("stream.reduce.output", IdentifierResolver.TEXT_ID));
    jobConf_.setClass("stream.reduce.output.reader.class", idResolver.getOutputReaderClass(), OutputReader.class);
    if (isReducerACommand || jobConf_.get("stream.reduce.output") != null) {
        // if reducer is a command, then output key/value classes come from the
        // idResolver
        jobConf_.setOutputKeyClass(idResolver.getOutputKeyClass());
        jobConf_.setOutputValueClass(idResolver.getOutputValueClass());
    }
    if (inReaderSpec_ != null) {
        String[] args = inReaderSpec_.split(",");
        String readerClass = args[0];
        // this argument can only be a Java class
        c = StreamUtil.goodClassOrNull(jobConf_, readerClass, defaultPackage);
        if (c != null) {
            jobConf_.set("stream.recordreader.class", c.getName());
        } else {
            fail("-inputreader: class not found: " + readerClass);
        }
        for (int i = 1; i < args.length; i++) {
            String[] nv = args[i].split("=", 2);
            String k = "stream.recordreader." + nv[0];
            String v = (nv.length > 1) ? nv[1] : "";
            jobConf_.set(k, v);
        }
    }
    FileOutputFormat.setOutputPath(jobConf_, new Path(output_));
    fmt = null;
    if (outputFormatSpec_ != null) {
        c = StreamUtil.goodClassOrNull(jobConf_, outputFormatSpec_, defaultPackage);
        if (c != null) {
            fmt = c;
        } else {
            fail("-outputformat : class not found : " + outputFormatSpec_);
        }
    }
    if (fmt == null) {
        fmt = TextOutputFormat.class;
    }
    if (lazyOutput_) {
        LazyOutputFormat.setOutputFormatClass(jobConf_, fmt);
    } else {
        jobConf_.setOutputFormat(fmt);
    }
    if (partitionerSpec_ != null) {
        c = StreamUtil.goodClassOrNull(jobConf_, partitionerSpec_, defaultPackage);
        if (c != null) {
            jobConf_.setPartitionerClass(c);
        } else {
            fail("-partitioner : class not found : " + partitionerSpec_);
        }
    }
    if (mapDebugSpec_ != null) {
        jobConf_.setMapDebugScript(mapDebugSpec_);
    }
    if (reduceDebugSpec_ != null) {
        jobConf_.setReduceDebugScript(reduceDebugSpec_);
    }
    // last, allow user to override anything
    // (although typically used with properties we didn't touch)
    jar_ = packageJobJar();
    if (jar_ != null) {
        jobConf_.setJar(jar_);
    }
    if ((cacheArchives != null) || (cacheFiles != null)) {
        getURIs(cacheArchives, cacheFiles);
        boolean b = DistributedCache.checkURIs(fileURIs, archiveURIs);
        if (!b)
            fail(LINK_URI);
    }
    // set the jobconf for the caching parameters
    if (cacheArchives != null)
        DistributedCache.setCacheArchives(archiveURIs, jobConf_);
    if (cacheFiles != null)
        DistributedCache.setCacheFiles(fileURIs, jobConf_);
    if (verbose_) {
        listJobConfProperties();
    }
    msg("submitting to jobconf: " + getJobTrackerHostPort());
}

Also used : Path(org.apache.hadoop.fs.Path) IdentifierResolver(org.apache.hadoop.streaming.io.IdentifierResolver) SequenceFileInputFormat(org.apache.hadoop.mapred.SequenceFileInputFormat) KeyValueTextInputFormat(org.apache.hadoop.mapred.KeyValueTextInputFormat) TextInputFormat(org.apache.hadoop.mapred.TextInputFormat) SequenceFileAsTextInputFormat(org.apache.hadoop.mapred.SequenceFileAsTextInputFormat) SequenceFileAsTextInputFormat(org.apache.hadoop.mapred.SequenceFileAsTextInputFormat) KeyValueTextInputFormat(org.apache.hadoop.mapred.KeyValueTextInputFormat) JobConf(org.apache.hadoop.mapred.JobConf)

Example 69 with JobConf

use of org.apache.hadoop.mapred.JobConf in project hadoop by apache.

the class DumpTypedBytes method dumpTypedBytes.

/**
   * Dump given list of files to standard output as typed bytes.
   */
@SuppressWarnings("unchecked")
private int dumpTypedBytes(List<FileStatus> files) throws IOException {
    JobConf job = new JobConf(getConf());
    DataOutputStream dout = new DataOutputStream(System.out);
    AutoInputFormat autoInputFormat = new AutoInputFormat();
    for (FileStatus fileStatus : files) {
        FileSplit split = new FileSplit(fileStatus.getPath(), 0, fileStatus.getLen() * fileStatus.getBlockSize(), (String[]) null);
        RecordReader recReader = null;
        try {
            recReader = autoInputFormat.getRecordReader(split, job, Reporter.NULL);
            Object key = recReader.createKey();
            Object value = recReader.createValue();
            while (recReader.next(key, value)) {
                if (key instanceof Writable) {
                    TypedBytesWritableOutput.get(dout).write((Writable) key);
                } else {
                    TypedBytesOutput.get(dout).write(key);
                }
                if (value instanceof Writable) {
                    TypedBytesWritableOutput.get(dout).write((Writable) value);
                } else {
                    TypedBytesOutput.get(dout).write(value);
                }
            }
        } finally {
            if (recReader != null) {
                recReader.close();
            }
        }
    }
    dout.flush();
    return 0;
}

Also used : FileStatus(org.apache.hadoop.fs.FileStatus) DataOutputStream(java.io.DataOutputStream) RecordReader(org.apache.hadoop.mapred.RecordReader) Writable(org.apache.hadoop.io.Writable) FileSplit(org.apache.hadoop.mapred.FileSplit) JobConf(org.apache.hadoop.mapred.JobConf)

Example 70 with JobConf

use of org.apache.hadoop.mapred.JobConf in project hadoop by apache.

the class TestAutoInputFormat method testFormat.

@SuppressWarnings({ "unchecked", "deprecation" })
@Test
public void testFormat() throws IOException {
    JobConf job = new JobConf(conf);
    FileSystem fs = FileSystem.getLocal(conf);
    Path dir = new Path(System.getProperty("test.build.data", ".") + "/mapred");
    Path txtFile = new Path(dir, "auto.txt");
    Path seqFile = new Path(dir, "auto.seq");
    fs.delete(dir, true);
    FileInputFormat.setInputPaths(job, dir);
    Writer txtWriter = new OutputStreamWriter(fs.create(txtFile));
    try {
        for (int i = 0; i < LINES_COUNT; i++) {
            txtWriter.write("" + (10 * i));
            txtWriter.write("\n");
        }
    } finally {
        txtWriter.close();
    }
    SequenceFile.Writer seqWriter = SequenceFile.createWriter(fs, conf, seqFile, IntWritable.class, LongWritable.class);
    try {
        for (int i = 0; i < RECORDS_COUNT; i++) {
            IntWritable key = new IntWritable(11 * i);
            LongWritable value = new LongWritable(12 * i);
            seqWriter.append(key, value);
        }
    } finally {
        seqWriter.close();
    }
    AutoInputFormat format = new AutoInputFormat();
    InputSplit[] splits = format.getSplits(job, SPLITS_COUNT);
    for (InputSplit split : splits) {
        RecordReader reader = format.getRecordReader(split, job, Reporter.NULL);
        Object key = reader.createKey();
        Object value = reader.createValue();
        try {
            while (reader.next(key, value)) {
                if (key instanceof LongWritable) {
                    assertEquals("Wrong value class.", Text.class, value.getClass());
                    assertTrue("Invalid value", Integer.parseInt(((Text) value).toString()) % 10 == 0);
                } else {
                    assertEquals("Wrong key class.", IntWritable.class, key.getClass());
                    assertEquals("Wrong value class.", LongWritable.class, value.getClass());
                    assertTrue("Invalid key.", ((IntWritable) key).get() % 11 == 0);
                    assertTrue("Invalid value.", ((LongWritable) value).get() % 12 == 0);
                }
            }
        } finally {
            reader.close();
        }
    }
}

Also used : Path(org.apache.hadoop.fs.Path) AutoInputFormat(org.apache.hadoop.streaming.AutoInputFormat) RecordReader(org.apache.hadoop.mapred.RecordReader) Text(org.apache.hadoop.io.Text) SequenceFile(org.apache.hadoop.io.SequenceFile) FileSystem(org.apache.hadoop.fs.FileSystem) OutputStreamWriter(java.io.OutputStreamWriter) LongWritable(org.apache.hadoop.io.LongWritable) JobConf(org.apache.hadoop.mapred.JobConf) InputSplit(org.apache.hadoop.mapred.InputSplit) Writer(java.io.Writer) OutputStreamWriter(java.io.OutputStreamWriter) IntWritable(org.apache.hadoop.io.IntWritable) Test(org.junit.Test)

Aggregations

JobConf (org.apache.hadoop.mapred.JobConf)1037 Path (org.apache.hadoop.fs.Path)510 Test (org.junit.Test)317 FileSystem (org.apache.hadoop.fs.FileSystem)264 IOException (java.io.IOException)204 Configuration (org.apache.hadoop.conf.Configuration)163 InputSplit (org.apache.hadoop.mapred.InputSplit)110 ArrayList (java.util.ArrayList)89 Text (org.apache.hadoop.io.Text)82 File (java.io.File)81 RunningJob (org.apache.hadoop.mapred.RunningJob)67 Properties (java.util.Properties)58 List (java.util.List)49 HashMap (java.util.HashMap)47 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)47 SequenceFile (org.apache.hadoop.io.SequenceFile)45 TextInputFormat (org.apache.hadoop.mapred.TextInputFormat)44 Map (java.util.Map)42 Job (org.apache.hadoop.mapreduce.Job)42 LongWritable (org.apache.hadoop.io.LongWritable)41