Search in sources :

Example 16 with JobConf

use of org.apache.hadoop.mapred.JobConf in project hadoop by apache.

the class TestMiniMRProxyUser method setUp.

@Before
public void setUp() throws Exception {
    if (System.getProperty("hadoop.log.dir") == null) {
        System.setProperty("hadoop.log.dir", "/tmp");
    }
    int taskTrackers = 2;
    int dataNodes = 2;
    String proxyUser = System.getProperty("user.name");
    String proxyGroup = "g";
    StringBuilder sb = new StringBuilder();
    sb.append("127.0.0.1,localhost");
    for (InetAddress i : InetAddress.getAllByName(InetAddress.getLocalHost().getHostName())) {
        sb.append(",").append(i.getCanonicalHostName());
    }
    JobConf conf = new JobConf();
    conf.set("dfs.block.access.token.enable", "false");
    conf.set("dfs.permissions", "true");
    conf.set("hadoop.security.authentication", "simple");
    conf.set("hadoop.proxyuser." + proxyUser + ".hosts", sb.toString());
    conf.set("hadoop.proxyuser." + proxyUser + ".groups", proxyGroup);
    String[] userGroups = new String[] { proxyGroup };
    UserGroupInformation.createUserForTesting(proxyUser, userGroups);
    UserGroupInformation.createUserForTesting("u1", userGroups);
    UserGroupInformation.createUserForTesting("u2", new String[] { "gg" });
    dfsCluster = new MiniDFSCluster.Builder(conf).numDataNodes(dataNodes).build();
    FileSystem fileSystem = dfsCluster.getFileSystem();
    fileSystem.mkdirs(new Path("/tmp"));
    fileSystem.mkdirs(new Path("/user"));
    fileSystem.mkdirs(new Path("/hadoop/mapred/system"));
    fileSystem.setPermission(new Path("/tmp"), FsPermission.valueOf("-rwxrwxrwx"));
    fileSystem.setPermission(new Path("/user"), FsPermission.valueOf("-rwxrwxrwx"));
    fileSystem.setPermission(new Path("/hadoop/mapred/system"), FsPermission.valueOf("-rwx------"));
    String nnURI = fileSystem.getUri().toString();
    int numDirs = 1;
    String[] racks = null;
    String[] hosts = null;
    mrCluster = new MiniMRCluster(0, 0, taskTrackers, nnURI, numDirs, racks, hosts, null, conf);
    ProxyUsers.refreshSuperUserGroupsConfiguration(conf);
}
Also used : Path(org.apache.hadoop.fs.Path) MiniDFSCluster(org.apache.hadoop.hdfs.MiniDFSCluster) FileSystem(org.apache.hadoop.fs.FileSystem) InetAddress(java.net.InetAddress) JobConf(org.apache.hadoop.mapred.JobConf) MiniMRCluster(org.apache.hadoop.mapred.MiniMRCluster) Before(org.junit.Before)

Example 17 with JobConf

use of org.apache.hadoop.mapred.JobConf in project hadoop by apache.

the class TestNonExistentJob method setUp.

@Before
public void setUp() throws Exception {
    if (System.getProperty("hadoop.log.dir") == null) {
        System.setProperty("hadoop.log.dir", "/tmp");
    }
    int taskTrackers = 2;
    int dataNodes = 2;
    String proxyUser = System.getProperty("user.name");
    String proxyGroup = "g";
    StringBuilder sb = new StringBuilder();
    sb.append("127.0.0.1,localhost");
    for (InetAddress i : InetAddress.getAllByName(InetAddress.getLocalHost().getHostName())) {
        sb.append(",").append(i.getCanonicalHostName());
    }
    JobConf conf = new JobConf();
    conf.set("dfs.block.access.token.enable", "false");
    conf.set("dfs.permissions", "true");
    conf.set("hadoop.security.authentication", "simple");
    dfsCluster = new MiniDFSCluster.Builder(conf).numDataNodes(dataNodes).build();
    FileSystem fileSystem = dfsCluster.getFileSystem();
    fileSystem.mkdirs(new Path("/tmp"));
    fileSystem.mkdirs(new Path("/user"));
    fileSystem.mkdirs(new Path("/hadoop/mapred/system"));
    fileSystem.setPermission(new Path("/tmp"), FsPermission.valueOf("-rwxrwxrwx"));
    fileSystem.setPermission(new Path("/user"), FsPermission.valueOf("-rwxrwxrwx"));
    fileSystem.setPermission(new Path("/hadoop/mapred/system"), FsPermission.valueOf("-rwx------"));
    String nnURI = fileSystem.getUri().toString();
    int numDirs = 1;
    String[] racks = null;
    String[] hosts = null;
    mrCluster = new MiniMRCluster(0, 0, taskTrackers, nnURI, numDirs, racks, hosts, null, conf);
    ProxyUsers.refreshSuperUserGroupsConfiguration(conf);
}
Also used : Path(org.apache.hadoop.fs.Path) MiniDFSCluster(org.apache.hadoop.hdfs.MiniDFSCluster) FileSystem(org.apache.hadoop.fs.FileSystem) InetAddress(java.net.InetAddress) JobConf(org.apache.hadoop.mapred.JobConf) MiniMRCluster(org.apache.hadoop.mapred.MiniMRCluster) Before(org.junit.Before)

Example 18 with JobConf

use of org.apache.hadoop.mapred.JobConf in project hadoop by apache.

the class TestMRAppWithCombiner method testCombinerShouldUpdateTheReporter.

@Test
public void testCombinerShouldUpdateTheReporter() throws Exception {
    JobConf conf = new JobConf(mrCluster.getConfig());
    int numMaps = 5;
    int numReds = 2;
    Path in = new Path(mrCluster.getTestWorkDir().getAbsolutePath(), "testCombinerShouldUpdateTheReporter-in");
    Path out = new Path(mrCluster.getTestWorkDir().getAbsolutePath(), "testCombinerShouldUpdateTheReporter-out");
    createInputOutPutFolder(in, out, numMaps);
    conf.setJobName("test-job-with-combiner");
    conf.setMapperClass(IdentityMapper.class);
    conf.setCombinerClass(MyCombinerToCheckReporter.class);
    //conf.setJarByClass(MyCombinerToCheckReporter.class);
    conf.setReducerClass(IdentityReducer.class);
    DistributedCache.addFileToClassPath(TestMRJobs.APP_JAR, conf);
    conf.setOutputCommitter(CustomOutputCommitter.class);
    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputKeyClass(LongWritable.class);
    conf.setOutputValueClass(Text.class);
    FileInputFormat.setInputPaths(conf, in);
    FileOutputFormat.setOutputPath(conf, out);
    conf.setNumMapTasks(numMaps);
    conf.setNumReduceTasks(numReds);
    runJob(conf);
}
Also used : Path(org.apache.hadoop.fs.Path) JobConf(org.apache.hadoop.mapred.JobConf) Test(org.junit.Test)

Example 19 with JobConf

use of org.apache.hadoop.mapred.JobConf in project hadoop by apache.

the class TestMRJobs method testThreadDumpOnTaskTimeout.

@Test(timeout = 120000)
public void testThreadDumpOnTaskTimeout() throws IOException, InterruptedException, ClassNotFoundException {
    if (!(new File(MiniMRYarnCluster.APPJAR)).exists()) {
        LOG.info("MRAppJar " + MiniMRYarnCluster.APPJAR + " not found. Not running test.");
        return;
    }
    final SleepJob sleepJob = new SleepJob();
    final JobConf sleepConf = new JobConf(mrCluster.getConfig());
    sleepConf.setLong(MRJobConfig.TASK_TIMEOUT, 3 * 1000L);
    sleepConf.setInt(MRJobConfig.MAP_MAX_ATTEMPTS, 1);
    sleepJob.setConf(sleepConf);
    if (this instanceof TestUberAM) {
        sleepConf.setInt(MRJobConfig.MR_AM_TO_RM_HEARTBEAT_INTERVAL_MS, 30 * 1000);
    }
    // sleep for 10 seconds to trigger a kill with thread dump
    final Job job = sleepJob.createJob(1, 0, 10 * 60 * 1000L, 1, 0L, 0);
    job.setJarByClass(SleepJob.class);
    // The AppMaster jar itself.
    job.addFileToClassPath(APP_JAR);
    job.waitForCompletion(true);
    final JobId jobId = TypeConverter.toYarn(job.getJobID());
    final ApplicationId appID = jobId.getAppId();
    int pollElapsed = 0;
    while (true) {
        Thread.sleep(1000);
        pollElapsed += 1000;
        if (TERMINAL_RM_APP_STATES.contains(mrCluster.getResourceManager().getRMContext().getRMApps().get(appID).getState())) {
            break;
        }
        if (pollElapsed >= 60000) {
            LOG.warn("application did not reach terminal state within 60 seconds");
            break;
        }
    }
    // Job finished, verify logs
    //
    final String appIdStr = appID.toString();
    final String appIdSuffix = appIdStr.substring("application_".length(), appIdStr.length());
    final String containerGlob = "container_" + appIdSuffix + "_*_*";
    final String syslogGlob = appIdStr + Path.SEPARATOR + containerGlob + Path.SEPARATOR + TaskLog.LogName.SYSLOG;
    int numAppMasters = 0;
    int numMapTasks = 0;
    for (int i = 0; i < NUM_NODE_MGRS; i++) {
        final Configuration nmConf = mrCluster.getNodeManager(i).getConfig();
        for (String logDir : nmConf.getTrimmedStrings(YarnConfiguration.NM_LOG_DIRS)) {
            final Path absSyslogGlob = new Path(logDir + Path.SEPARATOR + syslogGlob);
            LOG.info("Checking for glob: " + absSyslogGlob);
            for (FileStatus syslog : localFs.globStatus(absSyslogGlob)) {
                boolean foundAppMaster = false;
                boolean foundThreadDump = false;
                // Determine the container type
                final BufferedReader syslogReader = new BufferedReader(new InputStreamReader(localFs.open(syslog.getPath())));
                try {
                    for (String line; (line = syslogReader.readLine()) != null; ) {
                        if (line.contains(MRAppMaster.class.getName())) {
                            foundAppMaster = true;
                            break;
                        }
                    }
                } finally {
                    syslogReader.close();
                }
                // Check for thread dump in stdout
                final Path stdoutPath = new Path(syslog.getPath().getParent(), TaskLog.LogName.STDOUT.toString());
                final BufferedReader stdoutReader = new BufferedReader(new InputStreamReader(localFs.open(stdoutPath)));
                try {
                    for (String line; (line = stdoutReader.readLine()) != null; ) {
                        if (line.contains("Full thread dump")) {
                            foundThreadDump = true;
                            break;
                        }
                    }
                } finally {
                    stdoutReader.close();
                }
                if (foundAppMaster) {
                    numAppMasters++;
                    if (this instanceof TestUberAM) {
                        Assert.assertTrue("No thread dump", foundThreadDump);
                    } else {
                        Assert.assertFalse("Unexpected thread dump", foundThreadDump);
                    }
                } else {
                    numMapTasks++;
                    Assert.assertTrue("No thread dump", foundThreadDump);
                }
            }
        }
    }
    // Make sure we checked non-empty set
    //
    Assert.assertEquals("No AppMaster log found!", 1, numAppMasters);
    if (sleepConf.getBoolean(MRJobConfig.JOB_UBERTASK_ENABLE, false)) {
        Assert.assertSame("MapTask log with uber found!", 0, numMapTasks);
    } else {
        Assert.assertSame("No MapTask log found!", 1, numMapTasks);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) MRAppMaster(org.apache.hadoop.mapreduce.v2.app.MRAppMaster) FileStatus(org.apache.hadoop.fs.FileStatus) Configuration(org.apache.hadoop.conf.Configuration) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) InputStreamReader(java.io.InputStreamReader) BufferedReader(java.io.BufferedReader) SleepJob(org.apache.hadoop.mapreduce.SleepJob) RunningJob(org.apache.hadoop.mapred.RunningJob) Job(org.apache.hadoop.mapreduce.Job) RandomTextWriterJob(org.apache.hadoop.RandomTextWriterJob) SleepJob(org.apache.hadoop.mapreduce.SleepJob) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) File(java.io.File) JobConf(org.apache.hadoop.mapred.JobConf) JobId(org.apache.hadoop.mapreduce.v2.api.records.JobId) Test(org.junit.Test)

Example 20 with JobConf

use of org.apache.hadoop.mapred.JobConf in project hadoop by apache.

the class ExternalMapReduce method run.

public int run(String[] argv) throws IOException {
    if (argv.length < 2) {
        System.out.println("ExternalMapReduce <input> <output>");
        return -1;
    }
    Path outDir = new Path(argv[1]);
    Path input = new Path(argv[0]);
    JobConf testConf = new JobConf(getConf(), ExternalMapReduce.class);
    //try to load a class from libjar
    try {
        testConf.getClassByName("testjar.ClassWordCount");
    } catch (ClassNotFoundException e) {
        System.out.println("Could not find class from libjar");
        return -1;
    }
    testConf.setJobName("external job");
    FileInputFormat.setInputPaths(testConf, input);
    FileOutputFormat.setOutputPath(testConf, outDir);
    testConf.setMapperClass(MapClass.class);
    testConf.setReducerClass(Reduce.class);
    testConf.setNumReduceTasks(1);
    JobClient.runJob(testConf);
    return 0;
}
Also used : Path(org.apache.hadoop.fs.Path) JobConf(org.apache.hadoop.mapred.JobConf)

Aggregations

JobConf (org.apache.hadoop.mapred.JobConf)1037 Path (org.apache.hadoop.fs.Path)510 Test (org.junit.Test)317 FileSystem (org.apache.hadoop.fs.FileSystem)264 IOException (java.io.IOException)204 Configuration (org.apache.hadoop.conf.Configuration)163 InputSplit (org.apache.hadoop.mapred.InputSplit)110 ArrayList (java.util.ArrayList)89 Text (org.apache.hadoop.io.Text)82 File (java.io.File)81 RunningJob (org.apache.hadoop.mapred.RunningJob)67 Properties (java.util.Properties)58 List (java.util.List)49 HashMap (java.util.HashMap)47 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)47 SequenceFile (org.apache.hadoop.io.SequenceFile)45 TextInputFormat (org.apache.hadoop.mapred.TextInputFormat)44 Map (java.util.Map)42 Job (org.apache.hadoop.mapreduce.Job)42 LongWritable (org.apache.hadoop.io.LongWritable)41