Search in sources :

Example 36 with Job

use of org.apache.hadoop.mapreduce.Job in project hadoop by apache.

the class TestMRFieldSelection method launch.

public static void launch() throws Exception {
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);
    int numOfInputLines = 10;
    Path outDir = new Path(testDir, "output_for_field_selection_test");
    Path inDir = new Path(testDir, "input_for_field_selection_test");
    StringBuffer inputData = new StringBuffer();
    StringBuffer expectedOutput = new StringBuffer();
    constructInputOutputData(inputData, expectedOutput, numOfInputLines);
    conf.set(FieldSelectionHelper.DATA_FIELD_SEPERATOR, "-");
    conf.set(FieldSelectionHelper.MAP_OUTPUT_KEY_VALUE_SPEC, "6,5,1-3:0-");
    conf.set(FieldSelectionHelper.REDUCE_OUTPUT_KEY_VALUE_SPEC, ":4,3,2,1,0,0-");
    Job job = MapReduceTestUtil.createJob(conf, inDir, outDir, 1, 1, inputData.toString());
    job.setMapperClass(FieldSelectionMapper.class);
    job.setReducerClass(FieldSelectionReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    job.setNumReduceTasks(1);
    job.waitForCompletion(true);
    assertTrue("Job Failed!", job.isSuccessful());
    //
    // Finally, we compare the reconstructed answer key with the
    // original one.  Remember, we need to ignore zero-count items
    // in the original key.
    //
    String outdata = MapReduceTestUtil.readOutput(outDir, conf);
    assertEquals("Outputs doesnt match.", expectedOutput.toString(), outdata);
    fs.delete(outDir, true);
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) FileSystem(org.apache.hadoop.fs.FileSystem) Job(org.apache.hadoop.mapreduce.Job)

Example 37 with Job

use of org.apache.hadoop.mapreduce.Job in project hadoop by apache.

the class TestMultithreadedMapper method run.

private void run(boolean ioEx, boolean rtEx) throws Exception {
    String localPathRoot = System.getProperty("test.build.data", "/tmp");
    Path inDir = new Path(localPathRoot, "testing/mt/input");
    Path outDir = new Path(localPathRoot, "testing/mt/output");
    Configuration conf = createJobConf();
    if (ioEx) {
        conf.setBoolean("multithreaded.ioException", true);
    }
    if (rtEx) {
        conf.setBoolean("multithreaded.runtimeException", true);
    }
    Job job = MapReduceTestUtil.createJob(conf, inDir, outDir, 1, 1);
    job.setJobName("mt");
    job.setMapperClass(MultithreadedMapper.class);
    MultithreadedMapper.setMapperClass(job, IDMap.class);
    MultithreadedMapper.setNumberOfThreads(job, 2);
    job.setReducerClass(Reducer.class);
    job.waitForCompletion(true);
    if (job.isSuccessful()) {
        assertFalse(ioEx || rtEx);
    } else {
        assertTrue(ioEx || rtEx);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) Job(org.apache.hadoop.mapreduce.Job)

Example 38 with Job

use of org.apache.hadoop.mapreduce.Job in project hadoop by apache.

the class TestJobOutputCommitter method testKilledJob.

// run a job which gets stuck in mapper and kill it.
private void testKilledJob(String fileName, Class<? extends OutputFormat> output, String[] exclude) throws Exception {
    Path outDir = getNewOutputDir();
    Job job = MapReduceTestUtil.createKillJob(conf, outDir, inDir);
    job.setOutputFormatClass(output);
    job.submit();
    // wait for the setup to be completed
    while (job.setupProgress() != 1.0f) {
        UtilsForTests.waitFor(100);
    }
    // kill the job
    job.killJob();
    assertFalse("Job did not get kill", job.waitForCompletion(true));
    if (fileName != null) {
        Path testFile = new Path(outDir, fileName);
        assertTrue("File " + testFile + " missing for job " + job.getJobID(), fs.exists(testFile));
    }
    // check if the files from the missing set exists
    for (String ex : exclude) {
        Path file = new Path(outDir, ex);
        assertFalse("File " + file + " should not be present for killed job " + job.getJobID(), fs.exists(file));
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Job(org.apache.hadoop.mapreduce.Job)

Example 39 with Job

use of org.apache.hadoop.mapreduce.Job in project hadoop by apache.

the class TestJobOutputCommitter method testSuccessfulJob.

// run a job with 1 map and let it run to completion
private void testSuccessfulJob(String filename, Class<? extends OutputFormat> output, String[] exclude) throws Exception {
    Path outDir = getNewOutputDir();
    Job job = MapReduceTestUtil.createJob(conf, inDir, outDir, 1, 0);
    job.setOutputFormatClass(output);
    assertTrue("Job failed!", job.waitForCompletion(true));
    Path testFile = new Path(outDir, filename);
    assertTrue("Done file missing for job " + job.getJobID(), fs.exists(testFile));
    // check if the files from the missing set exists
    for (String ex : exclude) {
        Path file = new Path(outDir, ex);
        assertFalse("File " + file + " should not be present for successful job " + job.getJobID(), fs.exists(file));
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Job(org.apache.hadoop.mapreduce.Job)

Example 40 with Job

use of org.apache.hadoop.mapreduce.Job in project hadoop by apache.

the class TestMapReduceAggregates method launch.

public static void launch() throws Exception {
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);
    int numOfInputLines = 20;
    String baseDir = System.getProperty("test.build.data", "build/test/data");
    Path OUTPUT_DIR = new Path(baseDir + "/output_for_aggregates_test");
    Path INPUT_DIR = new Path(baseDir + "/input_for_aggregates_test");
    String inputFile = "input.txt";
    fs.delete(INPUT_DIR, true);
    fs.mkdirs(INPUT_DIR);
    fs.delete(OUTPUT_DIR, true);
    StringBuffer inputData = new StringBuffer();
    StringBuffer expectedOutput = new StringBuffer();
    expectedOutput.append("max\t19\n");
    expectedOutput.append("min\t1\n");
    FSDataOutputStream fileOut = fs.create(new Path(INPUT_DIR, inputFile));
    for (int i = 1; i < numOfInputLines; i++) {
        expectedOutput.append("count_").append(idFormat.format(i));
        expectedOutput.append("\t").append(i).append("\n");
        inputData.append(idFormat.format(i));
        for (int j = 1; j < i; j++) {
            inputData.append(" ").append(idFormat.format(i));
        }
        inputData.append("\n");
    }
    expectedOutput.append("value_as_string_max\t9\n");
    expectedOutput.append("value_as_string_min\t1\n");
    expectedOutput.append("uniq_count\t15\n");
    fileOut.write(inputData.toString().getBytes("utf-8"));
    fileOut.close();
    System.out.println("inputData:");
    System.out.println(inputData.toString());
    conf.setInt(ValueAggregatorJobBase.DESCRIPTOR_NUM, 1);
    conf.set(ValueAggregatorJobBase.DESCRIPTOR + ".0", "UserDefined,org.apache.hadoop.mapreduce.lib.aggregate.AggregatorTests");
    conf.setLong(UniqValueCount.MAX_NUM_UNIQUE_VALUES, 14);
    Job job = Job.getInstance(conf);
    FileInputFormat.setInputPaths(job, INPUT_DIR);
    job.setInputFormatClass(TextInputFormat.class);
    FileOutputFormat.setOutputPath(job, OUTPUT_DIR);
    job.setOutputFormatClass(TextOutputFormat.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    job.setNumReduceTasks(1);
    job.setMapperClass(ValueAggregatorMapper.class);
    job.setReducerClass(ValueAggregatorReducer.class);
    job.setCombinerClass(ValueAggregatorCombiner.class);
    job.waitForCompletion(true);
    assertTrue(job.isSuccessful());
    //
    // Finally, we compare the reconstructed answer key with the
    // original one.  Remember, we need to ignore zero-count items
    // in the original key.
    //
    String outdata = MapReduceTestUtil.readOutput(OUTPUT_DIR, conf);
    System.out.println("full out data:");
    System.out.println(outdata.toString());
    outdata = outdata.substring(0, expectedOutput.toString().length());
    assertEquals(expectedOutput.toString(), outdata);
    fs.delete(OUTPUT_DIR, true);
    fs.delete(INPUT_DIR, true);
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) FileSystem(org.apache.hadoop.fs.FileSystem) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) Job(org.apache.hadoop.mapreduce.Job)

Aggregations

Job (org.apache.hadoop.mapreduce.Job)886 Path (org.apache.hadoop.fs.Path)498 Configuration (org.apache.hadoop.conf.Configuration)434 Test (org.junit.Test)259 IOException (java.io.IOException)135 FileSystem (org.apache.hadoop.fs.FileSystem)128 File (java.io.File)77 InputSplit (org.apache.hadoop.mapreduce.InputSplit)58 ArrayList (java.util.ArrayList)55 TaskAttemptContext (org.apache.hadoop.mapreduce.TaskAttemptContext)55 Scan (org.apache.hadoop.hbase.client.Scan)45 FileStatus (org.apache.hadoop.fs.FileStatus)44 NutchJob (org.apache.nutch.util.NutchJob)43 JobConf (org.apache.hadoop.mapred.JobConf)42 Text (org.apache.hadoop.io.Text)39 NutchConfiguration (org.apache.nutch.util.NutchConfiguration)36 HBaseConfiguration (org.apache.hadoop.hbase.HBaseConfiguration)35 JobContext (org.apache.hadoop.mapreduce.JobContext)35 GenericOptionsParser (org.apache.hadoop.util.GenericOptionsParser)35 CommandLine (org.apache.commons.cli.CommandLine)33