use of org.apache.hadoop.mapred.JobConf in project hadoop by apache.
the class TestJobControl method getCopyJob.
public Job getCopyJob() throws Exception {
Configuration defaults = new Configuration();
FileSystem fs = FileSystem.get(defaults);
Path rootDataDir = new Path(System.getProperty("test.build.data", "."), "TestJobControlData");
Path indir = new Path(rootDataDir, "indir");
Path outdir_1 = new Path(rootDataDir, "outdir_1");
JobControlTestUtils.cleanData(fs, indir);
JobControlTestUtils.generateData(fs, indir);
JobControlTestUtils.cleanData(fs, outdir_1);
ArrayList<Job> dependingJobs = null;
ArrayList<Path> inPaths_1 = new ArrayList<Path>();
inPaths_1.add(indir);
JobConf jobConf_1 = JobControlTestUtils.createCopyJob(inPaths_1, outdir_1);
Job job_1 = new Job(jobConf_1, dependingJobs);
return job_1;
}
use of org.apache.hadoop.mapred.JobConf in project hadoop by apache.
the class TestLocalJobControl method testLocalJobControlDataCopy.
/**
* This is a main function for testing JobControl class. It first cleans all
* the dirs it will use. Then it generates some random text data in
* TestJobControlData/indir. Then it creates 4 jobs: Job 1: copy data from
* indir to outdir_1 Job 2: copy data from indir to outdir_2 Job 3: copy data
* from outdir_1 and outdir_2 to outdir_3 Job 4: copy data from outdir to
* outdir_4 The jobs 1 and 2 have no dependency. The job 3 depends on jobs 1
* and 2. The job 4 depends on job 3.
*
* Then it creates a JobControl object and add the 4 jobs to the JobControl
* object. Finally, it creates a thread to run the JobControl object and
* monitors/reports the job states.
*/
@Test
public void testLocalJobControlDataCopy() throws Exception {
FileSystem fs = FileSystem.get(createJobConf());
Path rootDataDir = new Path(System.getProperty("test.build.data", "."), "TestLocalJobControlData");
Path indir = new Path(rootDataDir, "indir");
Path outdir_1 = new Path(rootDataDir, "outdir_1");
Path outdir_2 = new Path(rootDataDir, "outdir_2");
Path outdir_3 = new Path(rootDataDir, "outdir_3");
Path outdir_4 = new Path(rootDataDir, "outdir_4");
JobControlTestUtils.cleanData(fs, indir);
JobControlTestUtils.generateData(fs, indir);
JobControlTestUtils.cleanData(fs, outdir_1);
JobControlTestUtils.cleanData(fs, outdir_2);
JobControlTestUtils.cleanData(fs, outdir_3);
JobControlTestUtils.cleanData(fs, outdir_4);
ArrayList<Job> dependingJobs = null;
ArrayList<Path> inPaths_1 = new ArrayList<Path>();
inPaths_1.add(indir);
JobConf jobConf_1 = JobControlTestUtils.createCopyJob(inPaths_1, outdir_1);
Job job_1 = new Job(jobConf_1, dependingJobs);
ArrayList<Path> inPaths_2 = new ArrayList<Path>();
inPaths_2.add(indir);
JobConf jobConf_2 = JobControlTestUtils.createCopyJob(inPaths_2, outdir_2);
Job job_2 = new Job(jobConf_2, dependingJobs);
ArrayList<Path> inPaths_3 = new ArrayList<Path>();
inPaths_3.add(outdir_1);
inPaths_3.add(outdir_2);
JobConf jobConf_3 = JobControlTestUtils.createCopyJob(inPaths_3, outdir_3);
dependingJobs = new ArrayList<Job>();
dependingJobs.add(job_1);
dependingJobs.add(job_2);
Job job_3 = new Job(jobConf_3, dependingJobs);
ArrayList<Path> inPaths_4 = new ArrayList<Path>();
inPaths_4.add(outdir_3);
JobConf jobConf_4 = JobControlTestUtils.createCopyJob(inPaths_4, outdir_4);
dependingJobs = new ArrayList<Job>();
dependingJobs.add(job_3);
Job job_4 = new Job(jobConf_4, dependingJobs);
JobControl theControl = new JobControl("Test");
theControl.addJob(job_1);
theControl.addJob(job_2);
theControl.addJob(job_3);
theControl.addJob(job_4);
Thread theController = new Thread(theControl);
theController.start();
while (!theControl.allFinished()) {
LOG.debug("Jobs in waiting state: " + theControl.getWaitingJobs().size());
LOG.debug("Jobs in ready state: " + theControl.getReadyJobs().size());
LOG.debug("Jobs in running state: " + theControl.getRunningJobs().size());
LOG.debug("Jobs in success state: " + theControl.getSuccessfulJobs().size());
LOG.debug("Jobs in failed state: " + theControl.getFailedJobs().size());
LOG.debug("\n");
try {
Thread.sleep(5000);
} catch (Exception e) {
}
}
assertEquals("Some jobs failed", 0, theControl.getFailedJobs().size());
theControl.stop();
}
use of org.apache.hadoop.mapred.JobConf in project hadoop by apache.
the class StreamJob method setJobConf.
protected void setJobConf() throws IOException {
if (additionalConfSpec_ != null) {
LOG.warn("-additionalconfspec option is deprecated, please use -conf instead.");
config_.addResource(new Path(additionalConfSpec_));
}
// general MapRed job properties
jobConf_ = new JobConf(config_, StreamJob.class);
// (mapreduce.job.working.dir will be lazily initialized ONCE and depends on FS)
for (int i = 0; i < inputSpecs_.size(); i++) {
FileInputFormat.addInputPaths(jobConf_, (String) inputSpecs_.get(i));
}
String defaultPackage = this.getClass().getPackage().getName();
Class c;
Class fmt = null;
if (inReaderSpec_ == null && inputFormatSpec_ == null) {
fmt = TextInputFormat.class;
} else if (inputFormatSpec_ != null) {
if (inputFormatSpec_.equals(TextInputFormat.class.getName()) || inputFormatSpec_.equals(TextInputFormat.class.getCanonicalName()) || inputFormatSpec_.equals(TextInputFormat.class.getSimpleName())) {
fmt = TextInputFormat.class;
} else if (inputFormatSpec_.equals(KeyValueTextInputFormat.class.getName()) || inputFormatSpec_.equals(KeyValueTextInputFormat.class.getCanonicalName()) || inputFormatSpec_.equals(KeyValueTextInputFormat.class.getSimpleName())) {
if (inReaderSpec_ == null) {
fmt = KeyValueTextInputFormat.class;
}
} else if (inputFormatSpec_.equals(SequenceFileInputFormat.class.getName()) || inputFormatSpec_.equals(org.apache.hadoop.mapred.SequenceFileInputFormat.class.getCanonicalName()) || inputFormatSpec_.equals(org.apache.hadoop.mapred.SequenceFileInputFormat.class.getSimpleName())) {
if (inReaderSpec_ == null) {
fmt = SequenceFileInputFormat.class;
}
} else if (inputFormatSpec_.equals(SequenceFileAsTextInputFormat.class.getName()) || inputFormatSpec_.equals(SequenceFileAsTextInputFormat.class.getCanonicalName()) || inputFormatSpec_.equals(SequenceFileAsTextInputFormat.class.getSimpleName())) {
fmt = SequenceFileAsTextInputFormat.class;
} else {
c = StreamUtil.goodClassOrNull(jobConf_, inputFormatSpec_, defaultPackage);
if (c != null) {
fmt = c;
} else {
fail("-inputformat : class not found : " + inputFormatSpec_);
}
}
}
if (fmt == null) {
fmt = StreamInputFormat.class;
}
jobConf_.setInputFormat(fmt);
if (ioSpec_ != null) {
jobConf_.set("stream.map.input", ioSpec_);
jobConf_.set("stream.map.output", ioSpec_);
jobConf_.set("stream.reduce.input", ioSpec_);
jobConf_.set("stream.reduce.output", ioSpec_);
}
Class<? extends IdentifierResolver> idResolverClass = jobConf_.getClass("stream.io.identifier.resolver.class", IdentifierResolver.class, IdentifierResolver.class);
IdentifierResolver idResolver = ReflectionUtils.newInstance(idResolverClass, jobConf_);
idResolver.resolve(jobConf_.get("stream.map.input", IdentifierResolver.TEXT_ID));
jobConf_.setClass("stream.map.input.writer.class", idResolver.getInputWriterClass(), InputWriter.class);
idResolver.resolve(jobConf_.get("stream.reduce.input", IdentifierResolver.TEXT_ID));
jobConf_.setClass("stream.reduce.input.writer.class", idResolver.getInputWriterClass(), InputWriter.class);
jobConf_.set("stream.addenvironment", addTaskEnvironment_);
boolean isMapperACommand = false;
if (mapCmd_ != null) {
c = StreamUtil.goodClassOrNull(jobConf_, mapCmd_, defaultPackage);
if (c != null) {
jobConf_.setMapperClass(c);
} else {
isMapperACommand = true;
jobConf_.setMapperClass(PipeMapper.class);
jobConf_.setMapRunnerClass(PipeMapRunner.class);
jobConf_.set("stream.map.streamprocessor", URLEncoder.encode(mapCmd_, "UTF-8"));
}
}
if (comCmd_ != null) {
c = StreamUtil.goodClassOrNull(jobConf_, comCmd_, defaultPackage);
if (c != null) {
jobConf_.setCombinerClass(c);
} else {
jobConf_.setCombinerClass(PipeCombiner.class);
jobConf_.set("stream.combine.streamprocessor", URLEncoder.encode(comCmd_, "UTF-8"));
}
}
if (numReduceTasksSpec_ != null) {
int numReduceTasks = Integer.parseInt(numReduceTasksSpec_);
jobConf_.setNumReduceTasks(numReduceTasks);
}
boolean isReducerACommand = false;
if (redCmd_ != null) {
if (redCmd_.equals(REDUCE_NONE)) {
jobConf_.setNumReduceTasks(0);
}
if (jobConf_.getNumReduceTasks() != 0) {
if (redCmd_.compareToIgnoreCase("aggregate") == 0) {
jobConf_.setReducerClass(ValueAggregatorReducer.class);
jobConf_.setCombinerClass(ValueAggregatorCombiner.class);
} else {
c = StreamUtil.goodClassOrNull(jobConf_, redCmd_, defaultPackage);
if (c != null) {
jobConf_.setReducerClass(c);
} else {
isReducerACommand = true;
jobConf_.setReducerClass(PipeReducer.class);
jobConf_.set("stream.reduce.streamprocessor", URLEncoder.encode(redCmd_, "UTF-8"));
}
}
}
}
idResolver.resolve(jobConf_.get("stream.map.output", IdentifierResolver.TEXT_ID));
jobConf_.setClass("stream.map.output.reader.class", idResolver.getOutputReaderClass(), OutputReader.class);
if (isMapperACommand || jobConf_.get("stream.map.output") != null) {
// if mapper is a command, then map output key/value classes come from the
// idResolver
jobConf_.setMapOutputKeyClass(idResolver.getOutputKeyClass());
jobConf_.setMapOutputValueClass(idResolver.getOutputValueClass());
if (jobConf_.getNumReduceTasks() == 0) {
jobConf_.setOutputKeyClass(idResolver.getOutputKeyClass());
jobConf_.setOutputValueClass(idResolver.getOutputValueClass());
}
}
idResolver.resolve(jobConf_.get("stream.reduce.output", IdentifierResolver.TEXT_ID));
jobConf_.setClass("stream.reduce.output.reader.class", idResolver.getOutputReaderClass(), OutputReader.class);
if (isReducerACommand || jobConf_.get("stream.reduce.output") != null) {
// if reducer is a command, then output key/value classes come from the
// idResolver
jobConf_.setOutputKeyClass(idResolver.getOutputKeyClass());
jobConf_.setOutputValueClass(idResolver.getOutputValueClass());
}
if (inReaderSpec_ != null) {
String[] args = inReaderSpec_.split(",");
String readerClass = args[0];
// this argument can only be a Java class
c = StreamUtil.goodClassOrNull(jobConf_, readerClass, defaultPackage);
if (c != null) {
jobConf_.set("stream.recordreader.class", c.getName());
} else {
fail("-inputreader: class not found: " + readerClass);
}
for (int i = 1; i < args.length; i++) {
String[] nv = args[i].split("=", 2);
String k = "stream.recordreader." + nv[0];
String v = (nv.length > 1) ? nv[1] : "";
jobConf_.set(k, v);
}
}
FileOutputFormat.setOutputPath(jobConf_, new Path(output_));
fmt = null;
if (outputFormatSpec_ != null) {
c = StreamUtil.goodClassOrNull(jobConf_, outputFormatSpec_, defaultPackage);
if (c != null) {
fmt = c;
} else {
fail("-outputformat : class not found : " + outputFormatSpec_);
}
}
if (fmt == null) {
fmt = TextOutputFormat.class;
}
if (lazyOutput_) {
LazyOutputFormat.setOutputFormatClass(jobConf_, fmt);
} else {
jobConf_.setOutputFormat(fmt);
}
if (partitionerSpec_ != null) {
c = StreamUtil.goodClassOrNull(jobConf_, partitionerSpec_, defaultPackage);
if (c != null) {
jobConf_.setPartitionerClass(c);
} else {
fail("-partitioner : class not found : " + partitionerSpec_);
}
}
if (mapDebugSpec_ != null) {
jobConf_.setMapDebugScript(mapDebugSpec_);
}
if (reduceDebugSpec_ != null) {
jobConf_.setReduceDebugScript(reduceDebugSpec_);
}
// last, allow user to override anything
// (although typically used with properties we didn't touch)
jar_ = packageJobJar();
if (jar_ != null) {
jobConf_.setJar(jar_);
}
if ((cacheArchives != null) || (cacheFiles != null)) {
getURIs(cacheArchives, cacheFiles);
boolean b = DistributedCache.checkURIs(fileURIs, archiveURIs);
if (!b)
fail(LINK_URI);
}
// set the jobconf for the caching parameters
if (cacheArchives != null)
DistributedCache.setCacheArchives(archiveURIs, jobConf_);
if (cacheFiles != null)
DistributedCache.setCacheFiles(fileURIs, jobConf_);
if (verbose_) {
listJobConfProperties();
}
msg("submitting to jobconf: " + getJobTrackerHostPort());
}
use of org.apache.hadoop.mapred.JobConf in project hadoop by apache.
the class DumpTypedBytes method dumpTypedBytes.
/**
* Dump given list of files to standard output as typed bytes.
*/
@SuppressWarnings("unchecked")
private int dumpTypedBytes(List<FileStatus> files) throws IOException {
JobConf job = new JobConf(getConf());
DataOutputStream dout = new DataOutputStream(System.out);
AutoInputFormat autoInputFormat = new AutoInputFormat();
for (FileStatus fileStatus : files) {
FileSplit split = new FileSplit(fileStatus.getPath(), 0, fileStatus.getLen() * fileStatus.getBlockSize(), (String[]) null);
RecordReader recReader = null;
try {
recReader = autoInputFormat.getRecordReader(split, job, Reporter.NULL);
Object key = recReader.createKey();
Object value = recReader.createValue();
while (recReader.next(key, value)) {
if (key instanceof Writable) {
TypedBytesWritableOutput.get(dout).write((Writable) key);
} else {
TypedBytesOutput.get(dout).write(key);
}
if (value instanceof Writable) {
TypedBytesWritableOutput.get(dout).write((Writable) value);
} else {
TypedBytesOutput.get(dout).write(value);
}
}
} finally {
if (recReader != null) {
recReader.close();
}
}
}
dout.flush();
return 0;
}
use of org.apache.hadoop.mapred.JobConf in project hadoop by apache.
the class TestAutoInputFormat method testFormat.
@SuppressWarnings({ "unchecked", "deprecation" })
@Test
public void testFormat() throws IOException {
JobConf job = new JobConf(conf);
FileSystem fs = FileSystem.getLocal(conf);
Path dir = new Path(System.getProperty("test.build.data", ".") + "/mapred");
Path txtFile = new Path(dir, "auto.txt");
Path seqFile = new Path(dir, "auto.seq");
fs.delete(dir, true);
FileInputFormat.setInputPaths(job, dir);
Writer txtWriter = new OutputStreamWriter(fs.create(txtFile));
try {
for (int i = 0; i < LINES_COUNT; i++) {
txtWriter.write("" + (10 * i));
txtWriter.write("\n");
}
} finally {
txtWriter.close();
}
SequenceFile.Writer seqWriter = SequenceFile.createWriter(fs, conf, seqFile, IntWritable.class, LongWritable.class);
try {
for (int i = 0; i < RECORDS_COUNT; i++) {
IntWritable key = new IntWritable(11 * i);
LongWritable value = new LongWritable(12 * i);
seqWriter.append(key, value);
}
} finally {
seqWriter.close();
}
AutoInputFormat format = new AutoInputFormat();
InputSplit[] splits = format.getSplits(job, SPLITS_COUNT);
for (InputSplit split : splits) {
RecordReader reader = format.getRecordReader(split, job, Reporter.NULL);
Object key = reader.createKey();
Object value = reader.createValue();
try {
while (reader.next(key, value)) {
if (key instanceof LongWritable) {
assertEquals("Wrong value class.", Text.class, value.getClass());
assertTrue("Invalid value", Integer.parseInt(((Text) value).toString()) % 10 == 0);
} else {
assertEquals("Wrong key class.", IntWritable.class, key.getClass());
assertEquals("Wrong value class.", LongWritable.class, value.getClass());
assertTrue("Invalid key.", ((IntWritable) key).get() % 11 == 0);
assertTrue("Invalid value.", ((LongWritable) value).get() % 12 == 0);
}
}
} finally {
reader.close();
}
}
}
Aggregations