Examples with JobConf - org.apache.hadoop.mapred.JobConf

Example 46 with JobConf

use of org.apache.hadoop.mapred.JobConf in project hadoop by apache.

the class MergeManagerImpl method combineAndSpill.

private void combineAndSpill(RawKeyValueIterator kvIter, Counters.Counter inCounter) throws IOException {
    JobConf job = jobConf;
    Reducer combiner = ReflectionUtils.newInstance(combinerClass, job);
    Class<K> keyClass = (Class<K>) job.getMapOutputKeyClass();
    Class<V> valClass = (Class<V>) job.getMapOutputValueClass();
    RawComparator<K> comparator = (RawComparator<K>) job.getCombinerKeyGroupingComparator();
    try {
        CombineValuesIterator values = new CombineValuesIterator(kvIter, comparator, keyClass, valClass, job, Reporter.NULL, inCounter);
        while (values.more()) {
            combiner.reduce(values.getKey(), values, combineCollector, Reporter.NULL);
            values.nextKey();
        }
    } finally {
        combiner.close();
    }
}

Also used : CombineValuesIterator(org.apache.hadoop.mapred.Task.CombineValuesIterator) RawComparator(org.apache.hadoop.io.RawComparator) Reducer(org.apache.hadoop.mapred.Reducer) JobConf(org.apache.hadoop.mapred.JobConf)

Example 47 with JobConf

use of org.apache.hadoop.mapred.JobConf in project hadoop by apache.

the class NNBench method runTests.

/**
   * Run the test
   * 
   * @throws IOException on error
   */
private void runTests() throws IOException {
    getConf().setLong("io.bytes.per.checksum", bytesPerChecksum);
    JobConf job = new JobConf(getConf(), NNBench.class);
    job.setJobName("NNBench-" + operation);
    FileInputFormat.setInputPaths(job, new Path(baseDir, CONTROL_DIR_NAME));
    job.setInputFormat(SequenceFileInputFormat.class);
    // Explicitly set number of max map attempts to 1.
    job.setMaxMapAttempts(1);
    // Explicitly turn off speculative execution
    job.setSpeculativeExecution(false);
    job.setMapperClass(NNBenchMapper.class);
    job.setReducerClass(NNBenchReducer.class);
    FileOutputFormat.setOutputPath(job, new Path(baseDir, OUTPUT_DIR_NAME));
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    job.setNumReduceTasks((int) numberOfReduces);
    JobClient.runJob(job);
}

Also used : Path(org.apache.hadoop.fs.Path) JobConf(org.apache.hadoop.mapred.JobConf)

Example 48 with JobConf

use of org.apache.hadoop.mapred.JobConf in project hadoop by apache.

the class NNBenchWithoutMR method main.

/**
   * This launches a given namenode operation (<code>-operation</code>),
   * starting at a given time (<code>-startTime</code>).  The files used
   * by the openRead, rename, and delete operations are the same files
   * created by the createWrite operation.  Typically, the program
   * would be run four times, once for each operation in this order:
   * createWrite, openRead, rename, delete.
   *
   * <pre>
   * Usage: nnbench 
   *          -operation <one of createWrite, openRead, rename, or delete>
   *          -baseDir <base output/input DFS path>
   *          -startTime <time to start, given in seconds from the epoch>
   *          -numFiles <number of files to create, read, rename, or delete>
   *          -blocksPerFile <number of blocks to create per file>
   *         [-bytesPerBlock <number of bytes to write to each block, default is 1>]
   *         [-bytesPerChecksum <value for io.bytes.per.checksum>]
   * </pre>
   *
   * @param args is an array of the program command line arguments
   * @throws IOException indicates a problem with test startup
   */
public static void main(String[] args) throws IOException {
    String version = "NameNodeBenchmark.0.3";
    System.out.println(version);
    int bytesPerChecksum = -1;
    String usage = "Usage: nnbench " + "  -operation <one of createWrite, openRead, rename, or delete>\n " + "  -baseDir <base output/input DFS path>\n " + "  -startTime <time to start, given in seconds from the epoch>\n" + "  -numFiles <number of files to create>\n " + "  -replicationFactorPerFile <Replication factor for the files, default is 1>\n" + "  -blocksPerFile <number of blocks to create per file>\n" + "  [-bytesPerBlock <number of bytes to write to each block, default is 1>]\n" + "  [-bytesPerChecksum <value for io.bytes.per.checksum>]\n" + "Note: bytesPerBlock MUST be a multiple of bytesPerChecksum\n";
    String operation = null;
    for (int i = 0; i < args.length; i++) {
        // parse command line
        if (args[i].equals("-baseDir")) {
            baseDir = new Path(args[++i]);
        } else if (args[i].equals("-numFiles")) {
            numFiles = Integer.parseInt(args[++i]);
        } else if (args[i].equals("-blocksPerFile")) {
            blocksPerFile = Integer.parseInt(args[++i]);
        } else if (args[i].equals("-bytesPerBlock")) {
            bytesPerBlock = Long.parseLong(args[++i]);
        } else if (args[i].equals("-bytesPerChecksum")) {
            bytesPerChecksum = Integer.parseInt(args[++i]);
        } else if (args[i].equals("-replicationFactorPerFile")) {
            replicationFactorPerFile = Short.parseShort(args[++i]);
        } else if (args[i].equals("-startTime")) {
            startTime = Long.parseLong(args[++i]) * 1000;
        } else if (args[i].equals("-operation")) {
            operation = args[++i];
        } else {
            System.out.println(usage);
            System.exit(-1);
        }
    }
    bytesPerFile = bytesPerBlock * blocksPerFile;
    JobConf jobConf = new JobConf(new Configuration(), NNBench.class);
    if (bytesPerChecksum < 0) {
        // if it is not set in cmdline
        bytesPerChecksum = jobConf.getInt("io.bytes.per.checksum", 512);
    }
    jobConf.set("io.bytes.per.checksum", Integer.toString(bytesPerChecksum));
    System.out.println("Inputs: ");
    System.out.println("   operation: " + operation);
    System.out.println("   baseDir: " + baseDir);
    System.out.println("   startTime: " + startTime);
    System.out.println("   numFiles: " + numFiles);
    System.out.println("   replicationFactorPerFile: " + replicationFactorPerFile);
    System.out.println("   blocksPerFile: " + blocksPerFile);
    System.out.println("   bytesPerBlock: " + bytesPerBlock);
    System.out.println("   bytesPerChecksum: " + bytesPerChecksum);
    if (// verify args
    operation == null || baseDir == null || numFiles < 1 || blocksPerFile < 1 || bytesPerBlock < 0 || bytesPerBlock % bytesPerChecksum != 0) {
        System.err.println(usage);
        System.exit(-1);
    }
    fileSys = FileSystem.get(jobConf);
    String uniqueId = java.net.InetAddress.getLocalHost().getHostName();
    taskDir = new Path(baseDir, uniqueId);
    // initialize buffer used for writing/reading file
    buffer = new byte[(int) Math.min(bytesPerFile, 32768L)];
    Date execTime;
    Date endTime;
    long duration;
    int exceptions = 0;
    // wait for coordinated start time
    barrier();
    execTime = new Date();
    System.out.println("Job started: " + startTime);
    if (operation.equals("createWrite")) {
        if (!fileSys.mkdirs(taskDir)) {
            throw new IOException("Mkdirs failed to create " + taskDir.toString());
        }
        exceptions = createWrite();
    } else if (operation.equals("openRead")) {
        exceptions = openRead();
    } else if (operation.equals("rename")) {
        exceptions = rename();
    } else if (operation.equals("delete")) {
        exceptions = delete();
    } else {
        System.err.println(usage);
        System.exit(-1);
    }
    endTime = new Date();
    System.out.println("Job ended: " + endTime);
    duration = (endTime.getTime() - execTime.getTime()) / 1000;
    System.out.println("The " + operation + " job took " + duration + " seconds.");
    System.out.println("The job recorded " + exceptions + " exceptions.");
}

Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) IOException(java.io.IOException) JobConf(org.apache.hadoop.mapred.JobConf) Date(java.util.Date)

Example 49 with JobConf

use of org.apache.hadoop.mapred.JobConf in project hadoop by apache.

the class LoadGeneratorMR method submitAsMapReduce.

// The following methods are only used when LoadGenerator is run a MR job
/**
   * Based on args we submit the LoadGenerator as MR job.
   * Number of MapTasks is numMapTasks
   * @return exitCode for job submission
   */
private int submitAsMapReduce() {
    System.out.println("Running as a MapReduce job with " + numMapTasks + " mapTasks;  Output to file " + mrOutDir);
    Configuration conf = new Configuration(getConf());
    // First set all the args of LoadGenerator as Conf vars to pass to MR tasks
    conf.set(LG_ROOT, root.toString());
    conf.setInt(LG_MAXDELAYBETWEENOPS, maxDelayBetweenOps);
    conf.setInt(LG_NUMOFTHREADS, numOfThreads);
    //Pass Double as string
    conf.set(LG_READPR, readProbs[0] + "");
    //Pass Double as string
    conf.set(LG_WRITEPR, writeProbs[0] + "");
    //No idea what this is
    conf.setLong(LG_SEED, seed);
    conf.setInt(LG_NUMMAPTASKS, numMapTasks);
    if (scriptFile == null && durations[0] <= 0) {
        System.err.println("When run as a MapReduce job, elapsed Time or ScriptFile must be specified");
        System.exit(-1);
    }
    conf.setLong(LG_ELAPSEDTIME, durations[0]);
    conf.setLong(LG_STARTTIME, startTime);
    if (scriptFile != null) {
        conf.set(LG_SCRIPTFILE, scriptFile);
    }
    conf.set(LG_FLAGFILE, flagFile.toString());
    // Now set the necessary conf variables that apply to run MR itself.
    JobConf jobConf = new JobConf(conf, LoadGenerator.class);
    jobConf.setJobName("NNLoadGeneratorViaMR");
    jobConf.setNumMapTasks(numMapTasks);
    // 1 reducer to collect the results
    jobConf.setNumReduceTasks(1);
    jobConf.setOutputKeyClass(Text.class);
    jobConf.setOutputValueClass(IntWritable.class);
    jobConf.setMapperClass(MapperThatRunsNNLoadGenerator.class);
    jobConf.setReducerClass(ReducerThatCollectsLGdata.class);
    jobConf.setInputFormat(DummyInputFormat.class);
    jobConf.setOutputFormat(TextOutputFormat.class);
    // Explicitly set number of max map attempts to 1.
    jobConf.setMaxMapAttempts(1);
    // Explicitly turn off speculative execution
    jobConf.setSpeculativeExecution(false);
    // This mapReduce job has no input but has output
    FileOutputFormat.setOutputPath(jobConf, new Path(mrOutDir));
    try {
        JobClient.runJob(jobConf);
    } catch (IOException e) {
        System.err.println("Failed to run job: " + e.getMessage());
        return -1;
    }
    return 0;
}

Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) IOException(java.io.IOException) JobConf(org.apache.hadoop.mapred.JobConf)

Example 50 with JobConf

use of org.apache.hadoop.mapred.JobConf in project hadoop by apache.

the class TestHadoopArchiveLogsRunner method testHadoopArchiveLogs.

@Test(timeout = 50000)
public void testHadoopArchiveLogs() throws Exception {
    MiniDFSCluster dfsCluster = null;
    FileSystem fs = null;
    try (MiniYARNCluster yarnCluster = new MiniYARNCluster(TestHadoopArchiveLogsRunner.class.getSimpleName(), 1, 2, 1, 1)) {
        Configuration conf = new YarnConfiguration();
        conf.setBoolean(YarnConfiguration.LOG_AGGREGATION_ENABLED, true);
        conf.setBoolean(YarnConfiguration.YARN_MINICLUSTER_FIXED_PORTS, true);
        yarnCluster.init(conf);
        yarnCluster.start();
        conf = yarnCluster.getConfig();
        dfsCluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
        conf = new JobConf(conf);
        ApplicationId app1 = ApplicationId.newInstance(System.currentTimeMillis(), 1);
        fs = FileSystem.get(conf);
        Path remoteRootLogDir = new Path(conf.get(YarnConfiguration.NM_REMOTE_APP_LOG_DIR, YarnConfiguration.DEFAULT_NM_REMOTE_APP_LOG_DIR));
        Path workingDir = new Path(remoteRootLogDir, "archive-logs-work");
        String suffix = "logs";
        Path logDir = new Path(remoteRootLogDir, new Path(System.getProperty("user.name"), suffix));
        fs.mkdirs(logDir);
        Path app1Path = new Path(logDir, app1.toString());
        fs.mkdirs(app1Path);
        createFile(fs, new Path(app1Path, "log1"), 3);
        createFile(fs, new Path(app1Path, "log2"), 4);
        createFile(fs, new Path(app1Path, "log3"), 2);
        FileStatus[] app1Files = fs.listStatus(app1Path);
        Assert.assertEquals(3, app1Files.length);
        String[] args = new String[] { "-appId", app1.toString(), "-user", System.getProperty("user.name"), "-workingDir", workingDir.toString(), "-remoteRootLogDir", remoteRootLogDir.toString(), "-suffix", suffix };
        final HadoopArchiveLogsRunner halr = new HadoopArchiveLogsRunner(conf);
        assertEquals(0, ToolRunner.run(halr, args));
        fs = FileSystem.get(conf);
        app1Files = fs.listStatus(app1Path);
        Assert.assertEquals(1, app1Files.length);
        FileStatus harFile = app1Files[0];
        Assert.assertEquals(app1.toString() + ".har", harFile.getPath().getName());
        Path harPath = new Path("har:///" + harFile.getPath().toUri().getRawPath());
        FileStatus[] harLogs = HarFs.get(harPath.toUri(), conf).listStatus(harPath);
        Assert.assertEquals(3, harLogs.length);
        Arrays.sort(harLogs, new Comparator<FileStatus>() {

            @Override
            public int compare(FileStatus o1, FileStatus o2) {
                return o1.getPath().getName().compareTo(o2.getPath().getName());
            }
        });
        Assert.assertEquals("log1", harLogs[0].getPath().getName());
        Assert.assertEquals(3 * FILE_SIZE_INCREMENT, harLogs[0].getLen());
        Assert.assertEquals(new FsPermission(FsAction.READ_WRITE, FsAction.READ, FsAction.NONE), harLogs[0].getPermission());
        Assert.assertEquals(System.getProperty("user.name"), harLogs[0].getOwner());
        Assert.assertEquals("log2", harLogs[1].getPath().getName());
        Assert.assertEquals(4 * FILE_SIZE_INCREMENT, harLogs[1].getLen());
        Assert.assertEquals(new FsPermission(FsAction.READ_WRITE, FsAction.READ, FsAction.NONE), harLogs[1].getPermission());
        Assert.assertEquals(System.getProperty("user.name"), harLogs[1].getOwner());
        Assert.assertEquals("log3", harLogs[2].getPath().getName());
        Assert.assertEquals(2 * FILE_SIZE_INCREMENT, harLogs[2].getLen());
        Assert.assertEquals(new FsPermission(FsAction.READ_WRITE, FsAction.READ, FsAction.NONE), harLogs[2].getPermission());
        Assert.assertEquals(System.getProperty("user.name"), harLogs[2].getOwner());
        Assert.assertEquals(0, fs.listStatus(workingDir).length);
    } finally {
        if (fs != null) {
            fs.close();
        }
        if (dfsCluster != null) {
            dfsCluster.shutdown();
        }
    }
}

Also used : Path(org.apache.hadoop.fs.Path) MiniDFSCluster(org.apache.hadoop.hdfs.MiniDFSCluster) FileStatus(org.apache.hadoop.fs.FileStatus) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) Configuration(org.apache.hadoop.conf.Configuration) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) FileSystem(org.apache.hadoop.fs.FileSystem) FsPermission(org.apache.hadoop.fs.permission.FsPermission) MiniYARNCluster(org.apache.hadoop.yarn.server.MiniYARNCluster) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) JobConf(org.apache.hadoop.mapred.JobConf) Test(org.junit.Test)

Aggregations

JobConf (org.apache.hadoop.mapred.JobConf)1037 Path (org.apache.hadoop.fs.Path)510 Test (org.junit.Test)317 FileSystem (org.apache.hadoop.fs.FileSystem)264 IOException (java.io.IOException)204 Configuration (org.apache.hadoop.conf.Configuration)163 InputSplit (org.apache.hadoop.mapred.InputSplit)110 ArrayList (java.util.ArrayList)89 Text (org.apache.hadoop.io.Text)82 File (java.io.File)81 RunningJob (org.apache.hadoop.mapred.RunningJob)67 Properties (java.util.Properties)58 List (java.util.List)49 HashMap (java.util.HashMap)47 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)47 SequenceFile (org.apache.hadoop.io.SequenceFile)45 TextInputFormat (org.apache.hadoop.mapred.TextInputFormat)44 Map (java.util.Map)42 Job (org.apache.hadoop.mapreduce.Job)42 LongWritable (org.apache.hadoop.io.LongWritable)41