Examples with JobClient - org.apache.hadoop.mapred.JobClient

Example 26 with JobClient

use of org.apache.hadoop.mapred.JobClient in project hive by apache.

the class ColumnTruncateTask method execute.

@Override
public /**
   * start a new map-reduce job to do the truncation, almost the same as ExecDriver.
   */
int execute(DriverContext driverContext) {
    HiveConf.setVar(job, HiveConf.ConfVars.HIVEINPUTFORMAT, BucketizedHiveInputFormat.class.getName());
    success = true;
    HiveFileFormatUtils.prepareJobOutput(job);
    job.setOutputFormat(HiveOutputFormatImpl.class);
    job.setMapperClass(work.getMapperClass());
    Context ctx = driverContext.getCtx();
    boolean ctxCreated = false;
    try {
        if (ctx == null) {
            ctx = new Context(job);
            ctxCreated = true;
        }
    } catch (IOException e) {
        e.printStackTrace();
        console.printError("Error launching map-reduce job", "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
        return 5;
    }
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(NullWritable.class);
    if (work.getNumMapTasks() != null) {
        job.setNumMapTasks(work.getNumMapTasks());
    }
    // zero reducers
    job.setNumReduceTasks(0);
    if (work.getMinSplitSize() != null) {
        HiveConf.setLongVar(job, HiveConf.ConfVars.MAPREDMINSPLITSIZE, work.getMinSplitSize().longValue());
    }
    if (work.getInputformat() != null) {
        HiveConf.setVar(job, HiveConf.ConfVars.HIVEINPUTFORMAT, work.getInputformat());
    }
    String inpFormat = HiveConf.getVar(job, HiveConf.ConfVars.HIVEINPUTFORMAT);
    LOG.info("Using " + inpFormat);
    try {
        job.setInputFormat(JavaUtils.loadClass(inpFormat));
    } catch (ClassNotFoundException e) {
        throw new RuntimeException(e.getMessage(), e);
    }
    Path outputPath = this.work.getOutputDir();
    Path tempOutPath = Utilities.toTempPath(outputPath);
    try {
        FileSystem fs = tempOutPath.getFileSystem(job);
        if (!fs.exists(tempOutPath)) {
            fs.mkdirs(tempOutPath);
        }
    } catch (IOException e) {
        console.printError("Can't make path " + outputPath + " : " + e.getMessage());
        return 6;
    }
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(NullWritable.class);
    int returnVal = 0;
    RunningJob rj = null;
    boolean noName = StringUtils.isEmpty(job.get(MRJobConfig.JOB_NAME));
    String jobName = null;
    if (noName && this.getQueryPlan() != null) {
        int maxlen = conf.getIntVar(HiveConf.ConfVars.HIVEJOBNAMELENGTH);
        jobName = Utilities.abbreviate(this.getQueryPlan().getQueryStr(), maxlen - 6);
    }
    if (noName) {
        // This is for a special case to ensure unit tests pass
        job.set(MRJobConfig.JOB_NAME, jobName != null ? jobName : "JOB" + Utilities.randGen.nextInt());
    }
    try {
        addInputPaths(job, work);
        MapredWork mrWork = new MapredWork();
        mrWork.setMapWork(work);
        Utilities.setMapRedWork(job, mrWork, ctx.getMRTmpPath());
        // remove the pwd from conf file so that job tracker doesn't show this
        // logs
        String pwd = HiveConf.getVar(job, HiveConf.ConfVars.METASTOREPWD);
        if (pwd != null) {
            HiveConf.setVar(job, HiveConf.ConfVars.METASTOREPWD, "HIVE");
        }
        JobClient jc = new JobClient(job);
        String addedJars = Utilities.getResourceFiles(job, SessionState.ResourceType.JAR);
        if (!addedJars.isEmpty()) {
            job.set("tmpjars", addedJars);
        }
        // make this client wait if job trcker is not behaving well.
        Throttle.checkJobTracker(job, LOG);
        // Finally SUBMIT the JOB!
        rj = jc.submitJob(job);
        this.jobID = rj.getJobID();
        returnVal = jobExecHelper.progress(rj, jc, ctx);
        success = (returnVal == 0);
    } catch (Exception e) {
        e.printStackTrace();
        setException(e);
        String mesg = " with exception '" + Utilities.getNameMessage(e) + "'";
        if (rj != null) {
            mesg = "Ended Job = " + rj.getJobID() + mesg;
        } else {
            mesg = "Job Submission failed" + mesg;
        }
        // Has to use full name to make sure it does not conflict with
        // org.apache.commons.lang.StringUtils
        console.printError(mesg, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
        success = false;
        returnVal = 1;
    } finally {
        try {
            if (ctxCreated) {
                ctx.clear();
            }
            if (rj != null) {
                if (returnVal != 0) {
                    rj.killJob();
                }
            }
            ColumnTruncateMapper.jobClose(outputPath, success, job, console, work.getDynPartCtx(), null);
        } catch (Exception e) {
            LOG.warn("Failed while cleaning up ", e);
        } finally {
            HadoopJobExecHelper.runningJobs.remove(rj);
        }
    }
    return (returnVal);
}

Also used : Context(org.apache.hadoop.hive.ql.Context) DriverContext(org.apache.hadoop.hive.ql.DriverContext) CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) Path(org.apache.hadoop.fs.Path) BucketizedHiveInputFormat(org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat) IOException(java.io.IOException) JobClient(org.apache.hadoop.mapred.JobClient) IOException(java.io.IOException) MapredWork(org.apache.hadoop.hive.ql.plan.MapredWork) FileSystem(org.apache.hadoop.fs.FileSystem) RunningJob(org.apache.hadoop.mapred.RunningJob)

Example 27 with JobClient

use of org.apache.hadoop.mapred.JobClient in project hive by apache.

the class CompactorMR method launchCompactionJob.

private void launchCompactionJob(JobConf job, Path baseDir, CompactionType compactionType, StringableList dirsToSearch, List<AcidUtils.ParsedDelta> parsedDeltas, int curDirNumber, int obsoleteDirNumber, HiveConf hiveConf, TxnStore txnHandler, long id) throws IOException {
    job.setBoolean(IS_MAJOR, compactionType == CompactionType.MAJOR);
    if (dirsToSearch == null) {
        dirsToSearch = new StringableList();
    }
    StringableList deltaDirs = new StringableList();
    long minTxn = Long.MAX_VALUE;
    long maxTxn = Long.MIN_VALUE;
    for (AcidUtils.ParsedDelta delta : parsedDeltas) {
        LOG.debug("Adding delta " + delta.getPath() + " to directories to search");
        dirsToSearch.add(delta.getPath());
        deltaDirs.add(delta.getPath());
        minTxn = Math.min(minTxn, delta.getMinTransaction());
        maxTxn = Math.max(maxTxn, delta.getMaxTransaction());
    }
    if (baseDir != null)
        job.set(BASE_DIR, baseDir.toString());
    job.set(DELTA_DIRS, deltaDirs.toString());
    job.set(DIRS_TO_SEARCH, dirsToSearch.toString());
    job.setLong(MIN_TXN, minTxn);
    job.setLong(MAX_TXN, maxTxn);
    if (hiveConf.getBoolVar(HiveConf.ConfVars.HIVE_IN_TEST)) {
        mrJob = job;
    }
    LOG.info("Submitting " + compactionType + " compaction job '" + job.getJobName() + "' to " + job.getQueueName() + " queue.  " + "(current delta dirs count=" + curDirNumber + ", obsolete delta dirs count=" + obsoleteDirNumber + ". TxnIdRange[" + minTxn + "," + maxTxn + "]");
    RunningJob rj = new JobClient(job).submitJob(job);
    LOG.info("Submitted compaction job '" + job.getJobName() + "' with jobID=" + rj.getID() + " compaction ID=" + id);
    txnHandler.setHadoopJobId(rj.getID().toString(), id);
    rj.waitForCompletion();
}

Also used : RunningJob(org.apache.hadoop.mapred.RunningJob) JobClient(org.apache.hadoop.mapred.JobClient) AcidUtils(org.apache.hadoop.hive.ql.io.AcidUtils)

Example 28 with JobClient

use of org.apache.hadoop.mapred.JobClient in project hadoop-book by elephantscale.

the class RandomWriter method run.

/**
     * This is the main routine for launching a distributed random write job. It
     * runs 10 maps/node and each node writes 1 gig of data to a DFS file. The
     * reduce doesn't do anything.
     *
     * @throws IOException
     */
public int run(String[] args) throws Exception {
    if (args.length == 0) {
        System.out.println("Usage: writer <out-dir>");
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }
    Path outDir = new Path(args[0]);
    JobConf job = new JobConf(getConf());
    job.setJarByClass(RandomWriter.class);
    job.setJobName("random-writer");
    FileOutputFormat.setOutputPath(job, outDir);
    job.setOutputKeyClass(BytesWritable.class);
    job.setOutputValueClass(BytesWritable.class);
    job.setInputFormat(RandomInputFormat.class);
    job.setMapperClass(Map.class);
    job.setReducerClass(IdentityReducer.class);
    job.setOutputFormat(SequenceFileOutputFormat.class);
    JobClient client = new JobClient(job);
    ClusterStatus cluster = client.getClusterStatus();
    int numMapsPerHost = job.getInt("test.randomwriter.maps_per_host", 10);
    long numBytesToWritePerMap = job.getLong("test.randomwrite.bytes_per_map", 1 * 1024 * 1024 * 1024);
    if (numBytesToWritePerMap == 0) {
        System.err.println("Cannot have test.randomwrite.bytes_per_map set to 0");
        return -2;
    }
    long totalBytesToWrite = job.getLong("test.randomwrite.total_bytes", numMapsPerHost * numBytesToWritePerMap * cluster.getTaskTrackers());
    int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap);
    if (numMaps == 0 && totalBytesToWrite > 0) {
        numMaps = 1;
        job.setLong("test.randomwrite.bytes_per_map", totalBytesToWrite);
    }
    job.setNumMapTasks(numMaps);
    System.out.println("Running " + numMaps + " maps.");
    // reducer NONE
    job.setNumReduceTasks(0);
    Date startTime = new Date();
    System.out.println("Job started: " + startTime);
    JobClient.runJob(job);
    Date endTime = new Date();
    System.out.println("Job ended: " + endTime);
    System.out.println("The job took " + (endTime.getTime() - startTime.getTime()) / 1000 + " seconds.");
    return 0;
}

Also used : Path(org.apache.hadoop.fs.Path) JobConf(org.apache.hadoop.mapred.JobConf) JobClient(org.apache.hadoop.mapred.JobClient) ClusterStatus(org.apache.hadoop.mapred.ClusterStatus) Date(java.util.Date)

Example 29 with JobClient

use of org.apache.hadoop.mapred.JobClient in project hadoop-book by elephantscale.

the class InvertedIndex method main.

public static void main(String[] args) {
    JobClient client = new JobClient();
    JobConf conf = new JobConf(InvertedIndex.class);
    conf.setJobName("InvertedIndex");
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);
    FileInputFormat.addInputPath(conf, new Path("input"));
    FileOutputFormat.setOutputPath(conf, new Path("output"));
    conf.setMapperClass(InvertedIndexMapper.class);
    conf.setReducerClass(InvertedIndexReducer.class);
    client.setConf(conf);
    try {
        JobClient.runJob(conf);
    } catch (Exception e) {
        e.printStackTrace(System.out);
    }
}

Also used : Path(org.apache.hadoop.fs.Path) JobClient(org.apache.hadoop.mapred.JobClient) JobConf(org.apache.hadoop.mapred.JobConf) IOException(java.io.IOException)

Example 30 with JobClient

use of org.apache.hadoop.mapred.JobClient in project hadoop by apache.

the class DummySocketFactory method testSocketFactory.

/**
   * Check that we can reach a NameNode or Resource Manager using a specific
   * socket factory
   */
@Test
public void testSocketFactory() throws IOException {
    // Create a standard mini-cluster
    Configuration sconf = new Configuration();
    MiniDFSCluster cluster = new MiniDFSCluster.Builder(sconf).numDataNodes(1).build();
    final int nameNodePort = cluster.getNameNodePort();
    // Get a reference to its DFS directly
    FileSystem fs = cluster.getFileSystem();
    Assert.assertTrue(fs instanceof DistributedFileSystem);
    DistributedFileSystem directDfs = (DistributedFileSystem) fs;
    Configuration cconf = getCustomSocketConfigs(nameNodePort);
    fs = FileSystem.get(cconf);
    Assert.assertTrue(fs instanceof DistributedFileSystem);
    DistributedFileSystem dfs = (DistributedFileSystem) fs;
    JobClient client = null;
    MiniMRYarnCluster miniMRYarnCluster = null;
    try {
        // This will test RPC to the NameNode only.
        // could we test Client-DataNode connections?
        Path filePath = new Path("/dir");
        Assert.assertFalse(directDfs.exists(filePath));
        Assert.assertFalse(dfs.exists(filePath));
        directDfs.mkdirs(filePath);
        Assert.assertTrue(directDfs.exists(filePath));
        Assert.assertTrue(dfs.exists(filePath));
        // This will test RPC to a Resource Manager
        fs = FileSystem.get(sconf);
        JobConf jobConf = new JobConf();
        FileSystem.setDefaultUri(jobConf, fs.getUri().toString());
        miniMRYarnCluster = initAndStartMiniMRYarnCluster(jobConf);
        JobConf jconf = new JobConf(miniMRYarnCluster.getConfig());
        jconf.set("hadoop.rpc.socket.factory.class.default", "org.apache.hadoop.ipc.DummySocketFactory");
        jconf.set(MRConfig.FRAMEWORK_NAME, MRConfig.YARN_FRAMEWORK_NAME);
        String rmAddress = jconf.get(YarnConfiguration.RM_ADDRESS);
        String[] split = rmAddress.split(":");
        jconf.set(YarnConfiguration.RM_ADDRESS, split[0] + ':' + (Integer.parseInt(split[1]) + 10));
        client = new JobClient(jconf);
        JobStatus[] jobs = client.jobsToComplete();
        Assert.assertTrue(jobs.length == 0);
    } finally {
        closeClient(client);
        closeDfs(dfs);
        closeDfs(directDfs);
        stopMiniMRYarnCluster(miniMRYarnCluster);
        shutdownDFSCluster(cluster);
    }
}

Also used : Path(org.apache.hadoop.fs.Path) MiniDFSCluster(org.apache.hadoop.hdfs.MiniDFSCluster) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) Configuration(org.apache.hadoop.conf.Configuration) MiniMRYarnCluster(org.apache.hadoop.mapreduce.v2.MiniMRYarnCluster) DistributedFileSystem(org.apache.hadoop.hdfs.DistributedFileSystem) JobClient(org.apache.hadoop.mapred.JobClient) JobStatus(org.apache.hadoop.mapred.JobStatus) DistributedFileSystem(org.apache.hadoop.hdfs.DistributedFileSystem) FileSystem(org.apache.hadoop.fs.FileSystem) JobConf(org.apache.hadoop.mapred.JobConf) Test(org.junit.Test)

Aggregations

JobClient (org.apache.hadoop.mapred.JobClient)47 Path (org.apache.hadoop.fs.Path)25 RunningJob (org.apache.hadoop.mapred.RunningJob)20 FileSystem (org.apache.hadoop.fs.FileSystem)18 JobConf (org.apache.hadoop.mapred.JobConf)18 IOException (java.io.IOException)16 Configuration (org.apache.hadoop.conf.Configuration)16 ClusterStatus (org.apache.hadoop.mapred.ClusterStatus)11 Date (java.util.Date)7 Text (org.apache.hadoop.io.Text)6 Counters (org.apache.hadoop.mapred.Counters)6 Test (org.junit.Test)6 DataOutputStream (java.io.DataOutputStream)5 FileStatus (org.apache.hadoop.fs.FileStatus)5 BufferedReader (java.io.BufferedReader)4 InputStreamReader (java.io.InputStreamReader)4 CompilationOpContext (org.apache.hadoop.hive.ql.CompilationOpContext)4 Context (org.apache.hadoop.hive.ql.Context)4 DriverContext (org.apache.hadoop.hive.ql.DriverContext)4 FileOutputFormat (org.apache.hadoop.mapreduce.lib.output.FileOutputFormat)4