Search in sources :

Example 11 with RunningJob

use of org.apache.hadoop.mapred.RunningJob in project hbase by apache.

the class TestTableInputFormat method testInputFormat.

void testInputFormat(Class<? extends InputFormat> clazz) throws IOException {
    Configuration conf = UTIL.getConfiguration();
    final JobConf job = new JobConf(conf);
    job.setInputFormat(clazz);
    job.setOutputFormat(NullOutputFormat.class);
    job.setMapperClass(ExampleVerifier.class);
    job.setNumReduceTasks(0);
    LOG.debug("submitting job.");
    final RunningJob run = JobClient.runJob(job);
    assertTrue("job failed!", run.isSuccessful());
    assertEquals("Saw the wrong number of instances of the filtered-for row.", 2, run.getCounters().findCounter(TestTableInputFormat.class.getName() + ":row", "aaa").getCounter());
    assertEquals("Saw any instances of the filtered out row.", 0, run.getCounters().findCounter(TestTableInputFormat.class.getName() + ":row", "bbb").getCounter());
    assertEquals("Saw the wrong number of instances of columnA.", 1, run.getCounters().findCounter(TestTableInputFormat.class.getName() + ":family", "columnA").getCounter());
    assertEquals("Saw the wrong number of instances of columnB.", 1, run.getCounters().findCounter(TestTableInputFormat.class.getName() + ":family", "columnB").getCounter());
    assertEquals("Saw the wrong count of values for the filtered-for row.", 2, run.getCounters().findCounter(TestTableInputFormat.class.getName() + ":value", "value aaa").getCounter());
    assertEquals("Saw the wrong count of values for the filtered-out row.", 0, run.getCounters().findCounter(TestTableInputFormat.class.getName() + ":value", "value bbb").getCounter());
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) RunningJob(org.apache.hadoop.mapred.RunningJob) JobConf(org.apache.hadoop.mapred.JobConf)

Example 12 with RunningJob

use of org.apache.hadoop.mapred.RunningJob in project hbase by apache.

the class TestTableMapReduce method runTestOnTable.

@Override
protected void runTestOnTable(Table table) throws IOException {
    JobConf jobConf = null;
    try {
        LOG.info("Before map/reduce startup");
        jobConf = new JobConf(UTIL.getConfiguration(), TestTableMapReduce.class);
        jobConf.setJobName("process column contents");
        jobConf.setNumReduceTasks(1);
        TableMapReduceUtil.initTableMapJob(table.getName().getNameAsString(), Bytes.toString(INPUT_FAMILY), ProcessContentsMapper.class, ImmutableBytesWritable.class, Put.class, jobConf);
        TableMapReduceUtil.initTableReduceJob(table.getName().getNameAsString(), IdentityTableReduce.class, jobConf);
        LOG.info("Started " + table.getName());
        RunningJob job = JobClient.runJob(jobConf);
        assertTrue(job.isSuccessful());
        LOG.info("After map/reduce completion");
        // verify map-reduce results
        verify(table.getName());
    } finally {
        if (jobConf != null) {
            FileUtil.fullyDelete(new File(jobConf.get("hadoop.tmp.dir")));
        }
    }
}
Also used : RunningJob(org.apache.hadoop.mapred.RunningJob) JobConf(org.apache.hadoop.mapred.JobConf) File(java.io.File)

Example 13 with RunningJob

use of org.apache.hadoop.mapred.RunningJob in project hbase by apache.

the class TestTableMapReduceUtil method shoudBeValidMapReduceWithPartitionerEvaluation.

@Test
@SuppressWarnings("deprecation")
public void shoudBeValidMapReduceWithPartitionerEvaluation() throws IOException {
    Configuration cfg = UTIL.getConfiguration();
    JobConf jobConf = new JobConf(cfg);
    try {
        jobConf.setJobName("process row task");
        jobConf.setNumReduceTasks(2);
        TableMapReduceUtil.initTableMapJob(TABLE_NAME, new String(COLUMN_FAMILY), ClassificatorMapper.class, ImmutableBytesWritable.class, Put.class, jobConf);
        TableMapReduceUtil.initTableReduceJob(TABLE_NAME, ClassificatorRowReduce.class, jobConf, HRegionPartitioner.class);
        RunningJob job = JobClient.runJob(jobConf);
        assertTrue(job.isSuccessful());
    } finally {
        if (jobConf != null)
            FileUtil.fullyDelete(new File(jobConf.get("hadoop.tmp.dir")));
    }
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) RunningJob(org.apache.hadoop.mapred.RunningJob) JobConf(org.apache.hadoop.mapred.JobConf) File(java.io.File) Test(org.junit.Test)

Example 14 with RunningJob

use of org.apache.hadoop.mapred.RunningJob in project hbase by apache.

the class TestTableSnapshotInputFormat method doTestWithMapReduce.

// this is also called by the IntegrationTestTableSnapshotInputFormat
public static void doTestWithMapReduce(HBaseTestingUtility util, TableName tableName, String snapshotName, byte[] startRow, byte[] endRow, Path tableDir, int numRegions, int expectedNumSplits, boolean shutdownCluster) throws Exception {
    //create the table and snapshot
    createTableAndSnapshot(util, tableName, snapshotName, startRow, endRow, numRegions);
    if (shutdownCluster) {
        util.shutdownMiniHBaseCluster();
    }
    try {
        // create the job
        JobConf jobConf = new JobConf(util.getConfiguration());
        jobConf.setJarByClass(util.getClass());
        org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.addDependencyJarsForClasses(jobConf, TestTableSnapshotInputFormat.class);
        TableMapReduceUtil.initTableSnapshotMapJob(snapshotName, COLUMNS, TestTableSnapshotMapper.class, ImmutableBytesWritable.class, NullWritable.class, jobConf, true, tableDir);
        jobConf.setReducerClass(TestTableSnapshotInputFormat.TestTableSnapshotReducer.class);
        jobConf.setNumReduceTasks(1);
        jobConf.setOutputFormat(NullOutputFormat.class);
        RunningJob job = JobClient.runJob(jobConf);
        Assert.assertTrue(job.isSuccessful());
    } finally {
        if (!shutdownCluster) {
            util.getAdmin().deleteSnapshot(snapshotName);
            util.deleteTable(tableName);
        }
    }
}
Also used : RunningJob(org.apache.hadoop.mapred.RunningJob) JobConf(org.apache.hadoop.mapred.JobConf)

Example 15 with RunningJob

use of org.apache.hadoop.mapred.RunningJob in project mongo-hadoop by mongodb.

the class MongoTool method runMapredJob.

private int runMapredJob(final Configuration conf) {
    final JobConf job = new JobConf(conf, getClass());
    /**
         * Any arguments specified with -D <property>=<value>
         * on the CLI will be picked up and set here
         * They override any XML level values
         * Note that -D<space> is important - no space will
         * not work as it gets picked up by Java itself
         */
    // TODO - Do we need to set job name somehow more specifically?
    // This may or may not be correct/sane
    job.setJarByClass(getClass());
    final Class<? extends org.apache.hadoop.mapred.Mapper> mapper = MapredMongoConfigUtil.getMapper(conf);
    if (LOG.isDebugEnabled()) {
        LOG.debug("Mapper Class: " + mapper);
        LOG.debug("Input URI: " + conf.get(MapredMongoConfigUtil.INPUT_URI));
    }
    job.setMapperClass(mapper);
    Class<? extends org.apache.hadoop.mapred.Reducer> combiner = MapredMongoConfigUtil.getCombiner(conf);
    if (combiner != null) {
        job.setCombinerClass(combiner);
    }
    job.setReducerClass(MapredMongoConfigUtil.getReducer(conf));
    job.setOutputFormat(MapredMongoConfigUtil.getOutputFormat(conf));
    job.setOutputKeyClass(MapredMongoConfigUtil.getOutputKey(conf));
    job.setOutputValueClass(MapredMongoConfigUtil.getOutputValue(conf));
    job.setInputFormat(MapredMongoConfigUtil.getInputFormat(conf));
    Class mapOutputKeyClass = MapredMongoConfigUtil.getMapperOutputKey(conf);
    Class mapOutputValueClass = MapredMongoConfigUtil.getMapperOutputValue(conf);
    if (mapOutputKeyClass != null) {
        job.setMapOutputKeyClass(mapOutputKeyClass);
    }
    if (mapOutputValueClass != null) {
        job.setMapOutputValueClass(mapOutputValueClass);
    }
    /**
         * Determines if the job will run verbosely e.g. print debug output
         * Only works with foreground jobs
         */
    final boolean verbose = MapredMongoConfigUtil.isJobVerbose(conf);
    /**
         * Run job in foreground aka wait for completion or background?
         */
    final boolean background = MapredMongoConfigUtil.isJobBackground(conf);
    try {
        RunningJob runningJob = JobClient.runJob(job);
        if (background) {
            LOG.info("Setting up and running MapReduce job in background.");
            return 0;
        } else {
            LOG.info("Setting up and running MapReduce job in foreground, will wait for results.  {Verbose? " + verbose + "}");
            runningJob.waitForCompletion();
            return 0;
        }
    } catch (final Exception e) {
        LOG.error("Exception while executing job... ", e);
        return 1;
    }
}
Also used : RunningJob(org.apache.hadoop.mapred.RunningJob) JobConf(org.apache.hadoop.mapred.JobConf) IOException(java.io.IOException)

Aggregations

RunningJob (org.apache.hadoop.mapred.RunningJob)61 JobConf (org.apache.hadoop.mapred.JobConf)45 Path (org.apache.hadoop.fs.Path)35 FileSystem (org.apache.hadoop.fs.FileSystem)24 JobClient (org.apache.hadoop.mapred.JobClient)20 IOException (java.io.IOException)15 Counters (org.apache.hadoop.mapred.Counters)14 Group (org.apache.hadoop.mapred.Counters.Group)13 DMLConfig (org.apache.sysml.conf.DMLConfig)13 Configuration (org.apache.hadoop.conf.Configuration)7 MatrixChar_N_ReducerGroups (org.apache.sysml.runtime.matrix.mapred.MRJobConfiguration.MatrixChar_N_ReducerGroups)7 DataOutputStream (java.io.DataOutputStream)6 File (java.io.File)5 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)5 FileStatus (org.apache.hadoop.fs.FileStatus)5 Text (org.apache.hadoop.io.Text)5 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)5 InputInfo (org.apache.sysml.runtime.matrix.data.InputInfo)5 Test (org.junit.Test)5 URI (java.net.URI)4