Search in sources :

Example 31 with JobClient

use of org.apache.hadoop.mapred.JobClient in project hadoop by apache.

the class TestMultithreadedMapRunner method run.

private void run(boolean ioEx, boolean rtEx) throws Exception {
    Path inDir = new Path("testing/mt/input");
    Path outDir = new Path("testing/mt/output");
    // Hack for local FS that does not have the concept of a 'mounting point'
    if (isLocalFS()) {
        String localPathRoot = System.getProperty("test.build.data", "/tmp").replace(' ', '+');
        inDir = new Path(localPathRoot, inDir);
        outDir = new Path(localPathRoot, outDir);
    }
    JobConf conf = createJobConf();
    FileSystem fs = FileSystem.get(conf);
    fs.delete(outDir, true);
    if (!fs.mkdirs(inDir)) {
        throw new IOException("Mkdirs failed to create " + inDir.toString());
    }
    {
        DataOutputStream file = fs.create(new Path(inDir, "part-0"));
        file.writeBytes("a\nb\n\nc\nd\ne");
        file.close();
    }
    conf.setJobName("mt");
    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputKeyClass(LongWritable.class);
    conf.setOutputValueClass(Text.class);
    conf.setMapOutputKeyClass(LongWritable.class);
    conf.setMapOutputValueClass(Text.class);
    conf.setOutputFormat(TextOutputFormat.class);
    conf.setOutputKeyClass(LongWritable.class);
    conf.setOutputValueClass(Text.class);
    conf.setMapperClass(IDMap.class);
    conf.setReducerClass(IDReduce.class);
    FileInputFormat.setInputPaths(conf, inDir);
    FileOutputFormat.setOutputPath(conf, outDir);
    conf.setMapRunnerClass(MultithreadedMapRunner.class);
    conf.setInt(MultithreadedMapper.NUM_THREADS, 2);
    if (ioEx) {
        conf.setBoolean("multithreaded.ioException", true);
    }
    if (rtEx) {
        conf.setBoolean("multithreaded.runtimeException", true);
    }
    JobClient jc = new JobClient(conf);
    RunningJob job = jc.submitJob(conf);
    while (!job.isComplete()) {
        Thread.sleep(100);
    }
    if (job.isSuccessful()) {
        assertFalse(ioEx || rtEx);
    } else {
        assertTrue(ioEx || rtEx);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) DataOutputStream(java.io.DataOutputStream) FileSystem(org.apache.hadoop.fs.FileSystem) RunningJob(org.apache.hadoop.mapred.RunningJob) IOException(java.io.IOException) JobConf(org.apache.hadoop.mapred.JobConf) JobClient(org.apache.hadoop.mapred.JobClient)

Example 32 with JobClient

use of org.apache.hadoop.mapred.JobClient in project hadoop by apache.

the class GenericMRLoadGenerator method run.

public int run(String[] argv) throws Exception {
    Job job = Job.getInstance(getConf());
    job.setJarByClass(GenericMRLoadGenerator.class);
    job.setMapperClass(SampleMapper.class);
    job.setReducerClass(SampleReducer.class);
    if (!parseArgs(argv, job)) {
        return -1;
    }
    Configuration conf = job.getConfiguration();
    if (null == FileOutputFormat.getOutputPath(job)) {
        // No output dir? No writes
        job.setOutputFormatClass(NullOutputFormat.class);
    }
    if (0 == FileInputFormat.getInputPaths(job).length) {
        // No input dir? Generate random data
        System.err.println("No input path; ignoring InputFormat");
        confRandom(job);
    } else if (null != conf.getClass(INDIRECT_INPUT_FORMAT, null)) {
        // specified IndirectInputFormat? Build src list
        JobClient jClient = new JobClient(conf);
        Path tmpDir = new Path("/tmp");
        Random r = new Random();
        Path indirInputFile = new Path(tmpDir, Integer.toString(r.nextInt(Integer.MAX_VALUE), 36) + "_files");
        conf.set(INDIRECT_INPUT_FILE, indirInputFile.toString());
        SequenceFile.Writer writer = SequenceFile.createWriter(tmpDir.getFileSystem(conf), conf, indirInputFile, LongWritable.class, Text.class, SequenceFile.CompressionType.NONE);
        try {
            for (Path p : FileInputFormat.getInputPaths(job)) {
                FileSystem fs = p.getFileSystem(conf);
                Stack<Path> pathstack = new Stack<Path>();
                pathstack.push(p);
                while (!pathstack.empty()) {
                    for (FileStatus stat : fs.listStatus(pathstack.pop())) {
                        if (stat.isDirectory()) {
                            if (!stat.getPath().getName().startsWith("_")) {
                                pathstack.push(stat.getPath());
                            }
                        } else {
                            writer.sync();
                            writer.append(new LongWritable(stat.getLen()), new Text(stat.getPath().toUri().toString()));
                        }
                    }
                }
            }
        } finally {
            writer.close();
        }
    }
    Date startTime = new Date();
    System.out.println("Job started: " + startTime);
    int ret = job.waitForCompletion(true) ? 0 : 1;
    Date endTime = new Date();
    System.out.println("Job ended: " + endTime);
    System.out.println("The job took " + (endTime.getTime() - startTime.getTime()) / 1000 + " seconds.");
    return ret;
}
Also used : Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) Configuration(org.apache.hadoop.conf.Configuration) Text(org.apache.hadoop.io.Text) JobClient(org.apache.hadoop.mapred.JobClient) Date(java.util.Date) Stack(java.util.Stack) Random(java.util.Random) FileSystem(org.apache.hadoop.fs.FileSystem) LongWritable(org.apache.hadoop.io.LongWritable)

Example 33 with JobClient

use of org.apache.hadoop.mapred.JobClient in project hadoop by apache.

the class GenericMRLoadGenerator method confRandom.

/**
   * When no input dir is specified, generate random data.
   */
protected static void confRandom(Job job) throws IOException {
    // from RandomWriter
    job.setInputFormatClass(RandomInputFormat.class);
    job.setMapperClass(RandomMapOutput.class);
    Configuration conf = job.getConfiguration();
    final ClusterStatus cluster = new JobClient(conf).getClusterStatus();
    int numMapsPerHost = conf.getInt(RandomTextWriter.MAPS_PER_HOST, 10);
    long numBytesToWritePerMap = conf.getLong(RandomTextWriter.BYTES_PER_MAP, 1 * 1024 * 1024 * 1024);
    if (numBytesToWritePerMap == 0) {
        throw new IOException("Cannot have " + RandomTextWriter.BYTES_PER_MAP + " set to 0");
    }
    long totalBytesToWrite = conf.getLong(RandomTextWriter.TOTAL_BYTES, numMapsPerHost * numBytesToWritePerMap * cluster.getTaskTrackers());
    int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap);
    if (numMaps == 0 && totalBytesToWrite > 0) {
        numMaps = 1;
        conf.setLong(RandomTextWriter.BYTES_PER_MAP, totalBytesToWrite);
    }
    conf.setInt(MRJobConfig.NUM_MAPS, numMaps);
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) IOException(java.io.IOException) JobClient(org.apache.hadoop.mapred.JobClient) ClusterStatus(org.apache.hadoop.mapred.ClusterStatus)

Example 34 with JobClient

use of org.apache.hadoop.mapred.JobClient in project hadoop by apache.

the class TestEncryptedShuffle method encryptedShuffleWithCerts.

private void encryptedShuffleWithCerts(boolean useClientCerts) throws Exception {
    try {
        Configuration conf = new Configuration();
        String keystoresDir = new File(BASEDIR).getAbsolutePath();
        String sslConfsDir = KeyStoreTestUtil.getClasspathDir(TestEncryptedShuffle.class);
        KeyStoreTestUtil.setupSSLConfig(keystoresDir, sslConfsDir, conf, useClientCerts);
        conf.setBoolean(MRConfig.SHUFFLE_SSL_ENABLED_KEY, true);
        startCluster(conf);
        FileSystem fs = FileSystem.get(getJobConf());
        Path inputDir = new Path("input");
        fs.mkdirs(inputDir);
        Writer writer = new OutputStreamWriter(fs.create(new Path(inputDir, "data.txt")));
        writer.write("hello");
        writer.close();
        Path outputDir = new Path("output", "output");
        JobConf jobConf = new JobConf(getJobConf());
        jobConf.setInt("mapred.map.tasks", 1);
        jobConf.setInt("mapred.map.max.attempts", 1);
        jobConf.setInt("mapred.reduce.max.attempts", 1);
        jobConf.set("mapred.input.dir", inputDir.toString());
        jobConf.set("mapred.output.dir", outputDir.toString());
        JobClient jobClient = new JobClient(jobConf);
        RunningJob runJob = jobClient.submitJob(jobConf);
        runJob.waitForCompletion();
        Assert.assertTrue(runJob.isComplete());
        Assert.assertTrue(runJob.isSuccessful());
    } finally {
        stopCluster();
    }
}
Also used : Path(org.apache.hadoop.fs.Path) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) Configuration(org.apache.hadoop.conf.Configuration) FileSystem(org.apache.hadoop.fs.FileSystem) RunningJob(org.apache.hadoop.mapred.RunningJob) OutputStreamWriter(java.io.OutputStreamWriter) File(java.io.File) JobConf(org.apache.hadoop.mapred.JobConf) JobClient(org.apache.hadoop.mapred.JobClient) OutputStreamWriter(java.io.OutputStreamWriter) FileWriter(java.io.FileWriter) Writer(java.io.Writer)

Example 35 with JobClient

use of org.apache.hadoop.mapred.JobClient in project hadoop by apache.

the class TestDelegationToken method testDelegationToken.

@SuppressWarnings("deprecation")
@Test
public void testDelegationToken() throws Exception {
    final JobClient client;
    client = user1.doAs(new PrivilegedExceptionAction<JobClient>() {

        @Override
        public JobClient run() throws Exception {
            return new JobClient(cluster.createJobConf());
        }
    });
    final JobClient bobClient;
    bobClient = user2.doAs(new PrivilegedExceptionAction<JobClient>() {

        @Override
        public JobClient run() throws Exception {
            return new JobClient(cluster.createJobConf());
        }
    });
    final Token<DelegationTokenIdentifier> token = client.getDelegationToken(new Text(user1.getUserName()));
    DataInputBuffer inBuf = new DataInputBuffer();
    byte[] bytes = token.getIdentifier();
    inBuf.reset(bytes, bytes.length);
    DelegationTokenIdentifier ident = new DelegationTokenIdentifier();
    ident.readFields(inBuf);
    assertEquals("alice", ident.getUser().getUserName());
    long createTime = ident.getIssueDate();
    long maxTime = ident.getMaxDate();
    long currentTime = System.currentTimeMillis();
    System.out.println("create time: " + createTime);
    System.out.println("current time: " + currentTime);
    System.out.println("max time: " + maxTime);
    assertTrue("createTime < current", createTime < currentTime);
    assertTrue("current < maxTime", currentTime < maxTime);
    // renew should work as user alice
    user1.doAs(new PrivilegedExceptionAction<Void>() {

        @Override
        public Void run() throws Exception {
            client.renewDelegationToken(token);
            client.renewDelegationToken(token);
            return null;
        }
    });
    // bob should fail to renew
    user2.doAs(new PrivilegedExceptionAction<Void>() {

        @Override
        public Void run() throws Exception {
            try {
                bobClient.renewDelegationToken(token);
                Assert.fail("bob renew");
            } catch (AccessControlException ace) {
            // PASS
            }
            return null;
        }
    });
    // bob should fail to cancel
    user2.doAs(new PrivilegedExceptionAction<Void>() {

        @Override
        public Void run() throws Exception {
            try {
                bobClient.cancelDelegationToken(token);
                Assert.fail("bob cancel");
            } catch (AccessControlException ace) {
            // PASS
            }
            return null;
        }
    });
    // alice should be able to cancel but only cancel once
    user1.doAs(new PrivilegedExceptionAction<Void>() {

        @Override
        public Void run() throws Exception {
            client.cancelDelegationToken(token);
            try {
                client.cancelDelegationToken(token);
                Assert.fail("second alice cancel");
            } catch (InvalidToken it) {
            // PASS
            }
            return null;
        }
    });
}
Also used : AccessControlException(org.apache.hadoop.security.AccessControlException) Text(org.apache.hadoop.io.Text) PrivilegedExceptionAction(java.security.PrivilegedExceptionAction) JobClient(org.apache.hadoop.mapred.JobClient) AccessControlException(org.apache.hadoop.security.AccessControlException) DataInputBuffer(org.apache.hadoop.io.DataInputBuffer) InvalidToken(org.apache.hadoop.security.token.SecretManager.InvalidToken) Test(org.junit.Test)

Aggregations

JobClient (org.apache.hadoop.mapred.JobClient)47 Path (org.apache.hadoop.fs.Path)25 RunningJob (org.apache.hadoop.mapred.RunningJob)20 FileSystem (org.apache.hadoop.fs.FileSystem)18 JobConf (org.apache.hadoop.mapred.JobConf)18 IOException (java.io.IOException)16 Configuration (org.apache.hadoop.conf.Configuration)16 ClusterStatus (org.apache.hadoop.mapred.ClusterStatus)11 Date (java.util.Date)7 Text (org.apache.hadoop.io.Text)6 Counters (org.apache.hadoop.mapred.Counters)6 Test (org.junit.Test)6 DataOutputStream (java.io.DataOutputStream)5 FileStatus (org.apache.hadoop.fs.FileStatus)5 BufferedReader (java.io.BufferedReader)4 InputStreamReader (java.io.InputStreamReader)4 CompilationOpContext (org.apache.hadoop.hive.ql.CompilationOpContext)4 Context (org.apache.hadoop.hive.ql.Context)4 DriverContext (org.apache.hadoop.hive.ql.DriverContext)4 FileOutputFormat (org.apache.hadoop.mapreduce.lib.output.FileOutputFormat)4