Examples with Job - org.apache.hadoop.mapreduce.Job

Example 96 with Job

use of org.apache.hadoop.mapreduce.Job in project hbase by apache.

the class HFileSplitterJob method createSubmittableJob.

/**
   * Sets up the actual job.
   * @param args The command line parameters.
   * @return The newly created job.
   * @throws IOException When setting up the job fails.
   */
public Job createSubmittableJob(String[] args) throws IOException {
    Configuration conf = getConf();
    String inputDirs = args[0];
    String tabName = args[1];
    conf.setStrings(TABLES_KEY, tabName);
    Job job = Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + EnvironmentEdgeManager.currentTime()));
    job.setJarByClass(HFileSplitterJob.class);
    FileInputFormat.addInputPaths(job, inputDirs);
    job.setInputFormatClass(HFileInputFormat.class);
    job.setMapOutputKeyClass(ImmutableBytesWritable.class);
    String hfileOutPath = conf.get(BULK_OUTPUT_CONF_KEY);
    if (hfileOutPath != null) {
        LOG.debug("add incremental job :" + hfileOutPath + " from " + inputDirs);
        TableName tableName = TableName.valueOf(tabName);
        job.setMapperClass(HFileCellMapper.class);
        job.setReducerClass(KeyValueSortReducer.class);
        Path outputDir = new Path(hfileOutPath);
        FileOutputFormat.setOutputPath(job, outputDir);
        job.setMapOutputValueClass(KeyValue.class);
        try (Connection conn = ConnectionFactory.createConnection(conf);
            Table table = conn.getTable(tableName);
            RegionLocator regionLocator = conn.getRegionLocator(tableName)) {
            HFileOutputFormat2.configureIncrementalLoad(job, table.getTableDescriptor(), regionLocator);
        }
        LOG.debug("success configuring load incremental job");
        TableMapReduceUtil.addDependencyJars(job.getConfiguration(), com.google.common.base.Preconditions.class);
    } else {
        throw new IOException("No bulk output directory specified");
    }
    return job;
}

Also used : Path(org.apache.hadoop.fs.Path) TableName(org.apache.hadoop.hbase.TableName) RegionLocator(org.apache.hadoop.hbase.client.RegionLocator) Table(org.apache.hadoop.hbase.client.Table) Configuration(org.apache.hadoop.conf.Configuration) HBaseConfiguration(org.apache.hadoop.hbase.HBaseConfiguration) Connection(org.apache.hadoop.hbase.client.Connection) IOException(java.io.IOException) Job(org.apache.hadoop.mapreduce.Job)

Example 97 with Job

use of org.apache.hadoop.mapreduce.Job in project hbase by apache.

the class HFileSplitterJob method run.

@Override
public int run(String[] args) throws Exception {
    if (args.length < 2) {
        usage("Wrong number of arguments: " + args.length);
        System.exit(-1);
    }
    Job job = createSubmittableJob(args);
    int result = job.waitForCompletion(true) ? 0 : 1;
    return result;
}

Also used : Job(org.apache.hadoop.mapreduce.Job)

Example 98 with Job

use of org.apache.hadoop.mapreduce.Job in project hadoop by apache.

the class TotalOrderPartitioner method setConf.

/**
   * Read in the partition file and build indexing data structures.
   * If the keytype is {@link org.apache.hadoop.io.BinaryComparable} and
   * <tt>total.order.partitioner.natural.order</tt> is not false, a trie
   * of the first <tt>total.order.partitioner.max.trie.depth</tt>(2) + 1 bytes
   * will be built. Otherwise, keys will be located using a binary search of
   * the partition keyset using the {@link org.apache.hadoop.io.RawComparator}
   * defined for this job. The input file must be sorted with the same
   * comparator and contain {@link Job#getNumReduceTasks()} - 1 keys.
   */
// keytype from conf not static
@SuppressWarnings("unchecked")
public void setConf(Configuration conf) {
    try {
        this.conf = conf;
        String parts = getPartitionFile(conf);
        final Path partFile = new Path(parts);
        final FileSystem fs = (DEFAULT_PATH.equals(parts)) ? // assume in DistributedCache
        FileSystem.getLocal(conf) : partFile.getFileSystem(conf);
        Job job = Job.getInstance(conf);
        Class<K> keyClass = (Class<K>) job.getMapOutputKeyClass();
        K[] splitPoints = readPartitions(fs, partFile, keyClass, conf);
        if (splitPoints.length != job.getNumReduceTasks() - 1) {
            throw new IOException("Wrong number of partitions in keyset");
        }
        RawComparator<K> comparator = (RawComparator<K>) job.getSortComparator();
        for (int i = 0; i < splitPoints.length - 1; ++i) {
            if (comparator.compare(splitPoints[i], splitPoints[i + 1]) >= 0) {
                throw new IOException("Split points are out of order");
            }
        }
        boolean natOrder = conf.getBoolean(NATURAL_ORDER, true);
        if (natOrder && BinaryComparable.class.isAssignableFrom(keyClass)) {
            partitions = buildTrie((BinaryComparable[]) splitPoints, 0, splitPoints.length, new byte[0], // limit large but not huge.
            conf.getInt(MAX_TRIE_DEPTH, 200));
        } else {
            partitions = new BinarySearchNode(splitPoints, comparator);
        }
    } catch (IOException e) {
        throw new IllegalArgumentException("Can't read partitions file", e);
    }
}

Also used : Path(org.apache.hadoop.fs.Path) BinaryComparable(org.apache.hadoop.io.BinaryComparable) IOException(java.io.IOException) RawComparator(org.apache.hadoop.io.RawComparator) FileSystem(org.apache.hadoop.fs.FileSystem) Job(org.apache.hadoop.mapreduce.Job)

Example 99 with Job

use of org.apache.hadoop.mapreduce.Job in project hadoop by apache.

the class TestClientRedirect method testRedirect.

@Test
public void testRedirect() throws Exception {
    Configuration conf = new YarnConfiguration();
    conf.set(MRConfig.FRAMEWORK_NAME, MRConfig.YARN_FRAMEWORK_NAME);
    conf.set(YarnConfiguration.RM_ADDRESS, RMADDRESS);
    conf.set(JHAdminConfig.MR_HISTORY_ADDRESS, HSHOSTADDRESS);
    // Start the RM.
    RMService rmService = new RMService("test");
    rmService.init(conf);
    rmService.start();
    // Start the AM.
    AMService amService = new AMService();
    amService.init(conf);
    amService.start(conf);
    // Start the HS.
    HistoryService historyService = new HistoryService();
    historyService.init(conf);
    historyService.start(conf);
    LOG.info("services started");
    Cluster cluster = new Cluster(conf);
    org.apache.hadoop.mapreduce.JobID jobID = new org.apache.hadoop.mapred.JobID("201103121733", 1);
    org.apache.hadoop.mapreduce.Counters counters = cluster.getJob(jobID).getCounters();
    validateCounters(counters);
    Assert.assertTrue(amContact);
    LOG.info("Sleeping for 5 seconds before stop for" + " the client socket to not get EOF immediately..");
    Thread.sleep(5000);
    //bring down the AM service
    amService.stop();
    LOG.info("Sleeping for 5 seconds after stop for" + " the server to exit cleanly..");
    Thread.sleep(5000);
    amRestarting = true;
    // Same client
    //results are returned from fake (not started job)
    counters = cluster.getJob(jobID).getCounters();
    Assert.assertEquals(0, counters.countCounters());
    Job job = cluster.getJob(jobID);
    org.apache.hadoop.mapreduce.TaskID taskId = new org.apache.hadoop.mapreduce.TaskID(jobID, TaskType.MAP, 0);
    TaskAttemptID tId = new TaskAttemptID(taskId, 0);
    //invoke all methods to check that no exception is thrown
    job.killJob();
    job.killTask(tId);
    job.failTask(tId);
    job.getTaskCompletionEvents(0, 100);
    job.getStatus();
    job.getTaskDiagnostics(tId);
    job.getTaskReports(TaskType.MAP);
    job.getTrackingURL();
    amRestarting = false;
    amService = new AMService();
    amService.init(conf);
    amService.start(conf);
    //reset
    amContact = false;
    counters = cluster.getJob(jobID).getCounters();
    validateCounters(counters);
    Assert.assertTrue(amContact);
    // Stop the AM. It is not even restarting. So it should be treated as
    // completed.
    amService.stop();
    // Same client
    counters = cluster.getJob(jobID).getCounters();
    validateCounters(counters);
    Assert.assertTrue(hsContact);
    rmService.stop();
    historyService.stop();
}

Also used : Configuration(org.apache.hadoop.conf.Configuration) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) Cluster(org.apache.hadoop.mapreduce.Cluster) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) Job(org.apache.hadoop.mapreduce.Job) Test(org.junit.Test)

Example 100 with Job

use of org.apache.hadoop.mapreduce.Job in project hadoop by apache.

the class DistCp method createJob.

/**
   * Create Job object for submitting it, with all the configuration
   *
   * @return Reference to job object.
   * @throws IOException - Exception if any
   */
private Job createJob() throws IOException {
    String jobName = "distcp";
    String userChosenName = getConf().get(JobContext.JOB_NAME);
    if (userChosenName != null)
        jobName += ": " + userChosenName;
    Job job = Job.getInstance(getConf());
    job.setJobName(jobName);
    job.setInputFormatClass(DistCpUtils.getStrategy(getConf(), inputOptions));
    job.setJarByClass(CopyMapper.class);
    configureOutputFormat(job);
    job.setMapperClass(CopyMapper.class);
    job.setNumReduceTasks(0);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputFormatClass(CopyOutputFormat.class);
    job.getConfiguration().set(JobContext.MAP_SPECULATIVE, "false");
    job.getConfiguration().set(JobContext.NUM_MAPS, String.valueOf(inputOptions.getMaxMaps()));
    inputOptions.appendToConf(job.getConfiguration());
    return job;
}

Also used : Job(org.apache.hadoop.mapreduce.Job)

Aggregations

Job (org.apache.hadoop.mapreduce.Job)886 Path (org.apache.hadoop.fs.Path)498 Configuration (org.apache.hadoop.conf.Configuration)434 Test (org.junit.Test)259 IOException (java.io.IOException)135 FileSystem (org.apache.hadoop.fs.FileSystem)128 File (java.io.File)77 InputSplit (org.apache.hadoop.mapreduce.InputSplit)58 ArrayList (java.util.ArrayList)55 TaskAttemptContext (org.apache.hadoop.mapreduce.TaskAttemptContext)55 Scan (org.apache.hadoop.hbase.client.Scan)45 FileStatus (org.apache.hadoop.fs.FileStatus)44 NutchJob (org.apache.nutch.util.NutchJob)43 JobConf (org.apache.hadoop.mapred.JobConf)42 Text (org.apache.hadoop.io.Text)39 NutchConfiguration (org.apache.nutch.util.NutchConfiguration)36 HBaseConfiguration (org.apache.hadoop.hbase.HBaseConfiguration)35 JobContext (org.apache.hadoop.mapreduce.JobContext)35 GenericOptionsParser (org.apache.hadoop.util.GenericOptionsParser)35 CommandLine (org.apache.commons.cli.CommandLine)33