Search in sources :

Example 6 with RunningJob

use of org.apache.hadoop.mapred.RunningJob in project hadoop by apache.

the class TestMultipleOutputs method _testMOWithJavaSerialization.

protected void _testMOWithJavaSerialization(boolean withCounters) throws Exception {
    Path inDir = getDir(IN_DIR);
    Path outDir = getDir(OUT_DIR);
    JobConf conf = createJobConf();
    FileSystem fs = FileSystem.get(conf);
    DataOutputStream file = fs.create(new Path(inDir, "part-0"));
    file.writeBytes("a\nb\n\nc\nd\ne");
    file.close();
    fs.delete(inDir, true);
    fs.delete(outDir, true);
    file = fs.create(new Path(inDir, "part-1"));
    file.writeBytes("a\nb\n\nc\nd\ne");
    file.close();
    conf.setJobName("mo");
    conf.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization," + "org.apache.hadoop.io.serializer.WritableSerialization");
    conf.setInputFormat(TextInputFormat.class);
    conf.setMapOutputKeyClass(Long.class);
    conf.setMapOutputValueClass(String.class);
    conf.setOutputKeyComparatorClass(JavaSerializationComparator.class);
    conf.setOutputKeyClass(Long.class);
    conf.setOutputValueClass(String.class);
    conf.setOutputFormat(TextOutputFormat.class);
    MultipleOutputs.addNamedOutput(conf, "text", TextOutputFormat.class, Long.class, String.class);
    MultipleOutputs.setCountersEnabled(conf, withCounters);
    conf.setMapperClass(MOJavaSerDeMap.class);
    conf.setReducerClass(MOJavaSerDeReduce.class);
    FileInputFormat.setInputPaths(conf, inDir);
    FileOutputFormat.setOutputPath(conf, outDir);
    JobClient jc = new JobClient(conf);
    RunningJob job = jc.submitJob(conf);
    while (!job.isComplete()) {
        Thread.sleep(100);
    }
    // assert number of named output part files
    int namedOutputCount = 0;
    FileStatus[] statuses = fs.listStatus(outDir);
    for (FileStatus status : statuses) {
        if (status.getPath().getName().equals("text-m-00000") || status.getPath().getName().equals("text-r-00000")) {
            namedOutputCount++;
        }
    }
    assertEquals(2, namedOutputCount);
    // assert TextOutputFormat files correctness
    BufferedReader reader = new BufferedReader(new InputStreamReader(fs.open(new Path(FileOutputFormat.getOutputPath(conf), "text-r-00000"))));
    int count = 0;
    String line = reader.readLine();
    while (line != null) {
        assertTrue(line.endsWith("text"));
        line = reader.readLine();
        count++;
    }
    reader.close();
    assertFalse(count == 0);
    Counters.Group counters = job.getCounters().getGroup(MultipleOutputs.class.getName());
    if (!withCounters) {
        assertEquals(0, counters.size());
    } else {
        assertEquals(1, counters.size());
        assertEquals(2, counters.getCounter("text"));
    }
}
Also used : Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) InputStreamReader(java.io.InputStreamReader) DataOutputStream(java.io.DataOutputStream) JobClient(org.apache.hadoop.mapred.JobClient) FileSystem(org.apache.hadoop.fs.FileSystem) RunningJob(org.apache.hadoop.mapred.RunningJob) BufferedReader(java.io.BufferedReader) Counters(org.apache.hadoop.mapred.Counters) JobConf(org.apache.hadoop.mapred.JobConf)

Example 7 with RunningJob

use of org.apache.hadoop.mapred.RunningJob in project hadoop by apache.

the class TestMultipleOutputs method _testMultipleOutputs.

protected void _testMultipleOutputs(boolean withCounters) throws Exception {
    Path inDir = getDir(IN_DIR);
    Path outDir = getDir(OUT_DIR);
    JobConf conf = createJobConf();
    FileSystem fs = FileSystem.get(conf);
    DataOutputStream file = fs.create(new Path(inDir, "part-0"));
    file.writeBytes("a\nb\n\nc\nd\ne");
    file.close();
    file = fs.create(new Path(inDir, "part-1"));
    file.writeBytes("a\nb\n\nc\nd\ne");
    file.close();
    conf.setJobName("mo");
    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputKeyClass(LongWritable.class);
    conf.setOutputValueClass(Text.class);
    conf.setMapOutputKeyClass(LongWritable.class);
    conf.setMapOutputValueClass(Text.class);
    conf.setOutputFormat(TextOutputFormat.class);
    MultipleOutputs.addNamedOutput(conf, "text", TextOutputFormat.class, LongWritable.class, Text.class);
    MultipleOutputs.addMultiNamedOutput(conf, "sequence", SequenceFileOutputFormat.class, LongWritable.class, Text.class);
    MultipleOutputs.setCountersEnabled(conf, withCounters);
    conf.setMapperClass(MOMap.class);
    conf.setReducerClass(MOReduce.class);
    FileInputFormat.setInputPaths(conf, inDir);
    FileOutputFormat.setOutputPath(conf, outDir);
    JobClient jc = new JobClient(conf);
    RunningJob job = jc.submitJob(conf);
    while (!job.isComplete()) {
        Thread.sleep(100);
    }
    // assert number of named output part files
    int namedOutputCount = 0;
    FileStatus[] statuses = fs.listStatus(outDir);
    for (FileStatus status : statuses) {
        if (status.getPath().getName().equals("text-m-00000") || status.getPath().getName().equals("text-m-00001") || status.getPath().getName().equals("text-r-00000") || status.getPath().getName().equals("sequence_A-m-00000") || status.getPath().getName().equals("sequence_A-m-00001") || status.getPath().getName().equals("sequence_B-m-00000") || status.getPath().getName().equals("sequence_B-m-00001") || status.getPath().getName().equals("sequence_B-r-00000") || status.getPath().getName().equals("sequence_C-r-00000")) {
            namedOutputCount++;
        }
    }
    assertEquals(9, namedOutputCount);
    // assert TextOutputFormat files correctness
    BufferedReader reader = new BufferedReader(new InputStreamReader(fs.open(new Path(FileOutputFormat.getOutputPath(conf), "text-r-00000"))));
    int count = 0;
    String line = reader.readLine();
    while (line != null) {
        assertTrue(line.endsWith("text"));
        line = reader.readLine();
        count++;
    }
    reader.close();
    assertFalse(count == 0);
    // assert SequenceOutputFormat files correctness
    SequenceFile.Reader seqReader = new SequenceFile.Reader(fs, new Path(FileOutputFormat.getOutputPath(conf), "sequence_B-r-00000"), conf);
    assertEquals(LongWritable.class, seqReader.getKeyClass());
    assertEquals(Text.class, seqReader.getValueClass());
    count = 0;
    LongWritable key = new LongWritable();
    Text value = new Text();
    while (seqReader.next(key, value)) {
        assertEquals("sequence", value.toString());
        count++;
    }
    seqReader.close();
    assertFalse(count == 0);
    Counters.Group counters = job.getCounters().getGroup(MultipleOutputs.class.getName());
    if (!withCounters) {
        assertEquals(0, counters.size());
    } else {
        assertEquals(4, counters.size());
        assertEquals(4, counters.getCounter("text"));
        assertEquals(2, counters.getCounter("sequence_A"));
        assertEquals(4, counters.getCounter("sequence_B"));
        assertEquals(2, counters.getCounter("sequence_C"));
    }
}
Also used : Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) InputStreamReader(java.io.InputStreamReader) DataOutputStream(java.io.DataOutputStream) InputStreamReader(java.io.InputStreamReader) BufferedReader(java.io.BufferedReader) Text(org.apache.hadoop.io.Text) JobClient(org.apache.hadoop.mapred.JobClient) SequenceFile(org.apache.hadoop.io.SequenceFile) FileSystem(org.apache.hadoop.fs.FileSystem) RunningJob(org.apache.hadoop.mapred.RunningJob) BufferedReader(java.io.BufferedReader) Counters(org.apache.hadoop.mapred.Counters) LongWritable(org.apache.hadoop.io.LongWritable) JobConf(org.apache.hadoop.mapred.JobConf)

Example 8 with RunningJob

use of org.apache.hadoop.mapred.RunningJob in project hadoop by apache.

the class TestPipes method runProgram.

static void runProgram(MiniMRCluster mr, MiniDFSCluster dfs, Path program, Path inputPath, Path outputPath, int numMaps, int numReduces, String[] expectedResults, JobConf conf) throws IOException {
    Path wordExec = new Path("testing/bin/application");
    JobConf job = null;
    if (conf == null) {
        job = mr.createJobConf();
    } else {
        job = new JobConf(conf);
    }
    job.setNumMapTasks(numMaps);
    job.setNumReduceTasks(numReduces);
    {
        FileSystem fs = dfs.getFileSystem();
        fs.delete(wordExec.getParent(), true);
        fs.copyFromLocalFile(program, wordExec);
        Submitter.setExecutable(job, fs.makeQualified(wordExec).toString());
        Submitter.setIsJavaRecordReader(job, true);
        Submitter.setIsJavaRecordWriter(job, true);
        FileInputFormat.setInputPaths(job, inputPath);
        FileOutputFormat.setOutputPath(job, outputPath);
        RunningJob rJob = null;
        if (numReduces == 0) {
            rJob = Submitter.jobSubmit(job);
            while (!rJob.isComplete()) {
                try {
                    Thread.sleep(1000);
                } catch (InterruptedException ie) {
                    throw new RuntimeException(ie);
                }
            }
        } else {
            rJob = Submitter.runJob(job);
        }
        assertTrue("pipes job failed", rJob.isSuccessful());
        Counters counters = rJob.getCounters();
        Counters.Group wordCountCounters = counters.getGroup("WORDCOUNT");
        int numCounters = 0;
        for (Counter c : wordCountCounters) {
            System.out.println(c);
            ++numCounters;
        }
        assertTrue("No counters found!", (numCounters > 0));
    }
    List<String> results = new ArrayList<String>();
    for (Path p : FileUtil.stat2Paths(dfs.getFileSystem().listStatus(outputPath, new Utils.OutputFileUtils.OutputFilesFilter()))) {
        results.add(MapReduceTestUtil.readOutput(p, job));
    }
    assertEquals("number of reduces is wrong", expectedResults.length, results.size());
    for (int i = 0; i < results.size(); i++) {
        assertEquals("pipes program " + program + " output " + i + " wrong", expectedResults[i], results.get(i));
    }
}
Also used : Path(org.apache.hadoop.fs.Path) ArrayList(java.util.ArrayList) Counter(org.apache.hadoop.mapred.Counters.Counter) Utils(org.apache.hadoop.mapred.Utils) StringUtils(org.apache.hadoop.util.StringUtils) FileSystem(org.apache.hadoop.fs.FileSystem) RunningJob(org.apache.hadoop.mapred.RunningJob) Counters(org.apache.hadoop.mapred.Counters) JobConf(org.apache.hadoop.mapred.JobConf)

Example 9 with RunningJob

use of org.apache.hadoop.mapred.RunningJob in project hive by apache.

the class HadoopJobExecHelper method killRunningJobs.

public static void killRunningJobs() {
    synchronized (runningJobs) {
        for (RunningJob rj : runningJobs) {
            try {
                System.err.println("killing job with: " + rj.getID());
                rj.killJob();
            } catch (Exception e) {
                LOG.warn("Failed to kill job", e);
                System.err.println("Failed to kill job: " + rj.getID());
            // do nothing
            }
        }
    }
}
Also used : RunningJob(org.apache.hadoop.mapred.RunningJob) LockException(org.apache.hadoop.hive.ql.lockmgr.LockException) IOException(java.io.IOException)

Example 10 with RunningJob

use of org.apache.hadoop.mapred.RunningJob in project hbase by apache.

the class TestMultiTableSnapshotInputFormat method runJob.

@Override
protected void runJob(String jobName, Configuration c, List<Scan> scans) throws IOException, InterruptedException, ClassNotFoundException {
    JobConf job = new JobConf(TEST_UTIL.getConfiguration());
    job.setJobName(jobName);
    job.setMapperClass(Mapper.class);
    job.setReducerClass(Reducer.class);
    TableMapReduceUtil.initMultiTableSnapshotMapperJob(getSnapshotScanMapping(scans), Mapper.class, ImmutableBytesWritable.class, ImmutableBytesWritable.class, job, true, restoreDir);
    TableMapReduceUtil.addDependencyJars(job);
    job.setReducerClass(Reducer.class);
    // one to get final "first" and "last" key
    job.setNumReduceTasks(1);
    FileOutputFormat.setOutputPath(job, new Path(job.getJobName()));
    LOG.info("Started " + job.getJobName());
    RunningJob runningJob = JobClient.runJob(job);
    runningJob.waitForCompletion();
    assertTrue(runningJob.isSuccessful());
    LOG.info("After map/reduce completion - job " + jobName);
}
Also used : Path(org.apache.hadoop.fs.Path) RunningJob(org.apache.hadoop.mapred.RunningJob) JobConf(org.apache.hadoop.mapred.JobConf)

Aggregations

RunningJob (org.apache.hadoop.mapred.RunningJob)61 JobConf (org.apache.hadoop.mapred.JobConf)45 Path (org.apache.hadoop.fs.Path)35 FileSystem (org.apache.hadoop.fs.FileSystem)24 JobClient (org.apache.hadoop.mapred.JobClient)20 IOException (java.io.IOException)15 Counters (org.apache.hadoop.mapred.Counters)14 Group (org.apache.hadoop.mapred.Counters.Group)13 DMLConfig (org.apache.sysml.conf.DMLConfig)13 Configuration (org.apache.hadoop.conf.Configuration)7 MatrixChar_N_ReducerGroups (org.apache.sysml.runtime.matrix.mapred.MRJobConfiguration.MatrixChar_N_ReducerGroups)7 DataOutputStream (java.io.DataOutputStream)6 File (java.io.File)5 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)5 FileStatus (org.apache.hadoop.fs.FileStatus)5 Text (org.apache.hadoop.io.Text)5 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)5 InputInfo (org.apache.sysml.runtime.matrix.data.InputInfo)5 Test (org.junit.Test)5 URI (java.net.URI)4