Search in sources :

Example 6 with MiniMRCluster

use of org.apache.hadoop.mapred.MiniMRCluster in project hadoop by apache.

the class TestSymLink method testSymLink.

@Test(timeout = 120000)
public void testSymLink() throws Exception {
    boolean mayExit = false;
    MiniMRCluster mr = null;
    MiniDFSCluster dfs = null;
    try {
        Configuration conf = new Configuration();
        dfs = new MiniDFSCluster.Builder(conf).build();
        FileSystem fileSys = dfs.getFileSystem();
        String namenode = fileSys.getUri().toString();
        mr = new MiniMRCluster(1, namenode, 3);
        List<String> args = new ArrayList<String>();
        for (Map.Entry<String, String> entry : mr.createJobConf()) {
            args.add("-jobconf");
            args.add(entry.getKey() + "=" + entry.getValue());
        }
        // During tests, the default Configuration will use a local mapred
        // So don't specify -config or -cluster
        String[] argv = new String[] { "-input", INPUT_FILE, "-output", OUTPUT_DIR, "-mapper", map, "-reducer", reduce, "-jobconf", "stream.tmpdir=" + System.getProperty("test.build.data", "/tmp"), "-jobconf", JobConf.MAPRED_MAP_TASK_JAVA_OPTS + "=" + "-Dcontrib.name=" + System.getProperty("contrib.name") + " " + "-Dbuild.test=" + System.getProperty("build.test") + " " + conf.get(JobConf.MAPRED_MAP_TASK_JAVA_OPTS, conf.get(JobConf.MAPRED_TASK_JAVA_OPTS, "")), "-jobconf", JobConf.MAPRED_REDUCE_TASK_JAVA_OPTS + "=" + "-Dcontrib.name=" + System.getProperty("contrib.name") + " " + "-Dbuild.test=" + System.getProperty("build.test") + " " + conf.get(JobConf.MAPRED_REDUCE_TASK_JAVA_OPTS, conf.get(JobConf.MAPRED_TASK_JAVA_OPTS, "")), "-cacheFile", fileSys.getUri() + CACHE_FILE + "#testlink", "-jobconf", "mapred.jar=" + TestStreaming.STREAMING_JAR };
        for (String arg : argv) {
            args.add(arg);
        }
        argv = args.toArray(new String[args.size()]);
        fileSys.delete(new Path(OUTPUT_DIR), true);
        DataOutputStream file = fileSys.create(new Path(INPUT_FILE));
        file.writeBytes(mapString);
        file.close();
        file = fileSys.create(new Path(CACHE_FILE));
        file.writeBytes(cacheString);
        file.close();
        job = new StreamJob(argv, mayExit);
        job.go();
        fileSys = dfs.getFileSystem();
        String line = null;
        Path[] fileList = FileUtil.stat2Paths(fileSys.listStatus(new Path(OUTPUT_DIR), new Utils.OutputFileUtils.OutputFilesFilter()));
        for (int i = 0; i < fileList.length; i++) {
            System.out.println(fileList[i].toString());
            BufferedReader bread = new BufferedReader(new InputStreamReader(fileSys.open(fileList[i])));
            line = bread.readLine();
            System.out.println(line);
        }
        assertEquals(cacheString + "\t", line);
    } finally {
        if (dfs != null) {
            dfs.shutdown();
        }
        if (mr != null) {
            mr.shutdown();
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) MiniDFSCluster(org.apache.hadoop.hdfs.MiniDFSCluster) Configuration(org.apache.hadoop.conf.Configuration) InputStreamReader(java.io.InputStreamReader) DataOutputStream(java.io.DataOutputStream) ArrayList(java.util.ArrayList) MiniMRCluster(org.apache.hadoop.mapred.MiniMRCluster) Utils(org.apache.hadoop.mapred.Utils) FileSystem(org.apache.hadoop.fs.FileSystem) BufferedReader(java.io.BufferedReader) Map(java.util.Map) Test(org.junit.Test)

Example 7 with MiniMRCluster

use of org.apache.hadoop.mapred.MiniMRCluster in project hive by apache.

the class ManyMiniCluster method setupMRCluster.

private void setupMRCluster() {
    try {
        final int jobTrackerPort = findFreePort();
        final int taskTrackerPort = findFreePort();
        if (jobConf == null)
            jobConf = new JobConf();
        jobConf.setInt("mapred.submit.replication", 1);
        jobConf.set("yarn.scheduler.capacity.root.queues", "default");
        jobConf.set("yarn.scheduler.capacity.root.default.capacity", "100");
        // conf.set("hadoop.job.history.location",new File(workDir).getAbsolutePath()+"/history");
        System.setProperty("hadoop.log.dir", new File(workDir, "/logs").getAbsolutePath());
        mrCluster = new MiniMRCluster(jobTrackerPort, taskTrackerPort, numTaskTrackers, getFileSystem().getUri().toString(), numTaskTrackers, null, null, null, jobConf);
        jobConf = mrCluster.createJobConf();
    } catch (IOException e) {
        throw new IllegalStateException("Failed to Setup MR Cluster", e);
    }
}
Also used : IOException(java.io.IOException) JobConf(org.apache.hadoop.mapred.JobConf) File(java.io.File) MiniMRCluster(org.apache.hadoop.mapred.MiniMRCluster)

Example 8 with MiniMRCluster

use of org.apache.hadoop.mapred.MiniMRCluster in project hbase by apache.

the class HBaseTestingUtility method startMiniMapReduceCluster.

/**
   * Starts a <code>MiniMRCluster</code>. Call {@link #setFileSystemURI(String)} to use a different
   * filesystem.
   * @param servers  The number of <code>TaskTracker</code>'s to start.
   * @throws IOException When starting the cluster fails.
   */
private void startMiniMapReduceCluster(final int servers) throws IOException {
    if (mrCluster != null) {
        throw new IllegalStateException("MiniMRCluster is already running");
    }
    LOG.info("Starting mini mapreduce cluster...");
    setupClusterTestDir();
    createDirsAndSetProperties();
    forceChangeTaskLogDir();
    //// hadoop2 specific settings
    // Tests were failing because this process used 6GB of virtual memory and was getting killed.
    // we up the VM usable so that processes don't get killed.
    conf.setFloat("yarn.nodemanager.vmem-pmem-ratio", 8.0f);
    // Tests were failing due to MAPREDUCE-4880 / MAPREDUCE-4607 against hadoop 2.0.2-alpha and
    // this avoids the problem by disabling speculative task execution in tests.
    conf.setBoolean("mapreduce.map.speculative", false);
    conf.setBoolean("mapreduce.reduce.speculative", false);
    ////
    // Allow the user to override FS URI for this map-reduce cluster to use.
    mrCluster = new MiniMRCluster(servers, FS_URI != null ? FS_URI : FileSystem.get(conf).getUri().toString(), 1, null, null, new JobConf(this.conf));
    JobConf jobConf = MapreduceTestingShim.getJobConf(mrCluster);
    if (jobConf == null) {
        jobConf = mrCluster.createJobConf();
    }
    jobConf.set("mapreduce.cluster.local.dir", //Hadoop MiniMR overwrites this while it should not
    conf.get("mapreduce.cluster.local.dir"));
    LOG.info("Mini mapreduce cluster started");
    // In hadoop2, YARN/MR2 starts a mini cluster with its own conf instance and updates settings.
    // Our HBase MR jobs need several of these settings in order to properly run.  So we copy the
    // necessary config properties here.  YARN-129 required adding a few properties.
    conf.set("mapreduce.jobtracker.address", jobConf.get("mapreduce.jobtracker.address"));
    // this for mrv2 support; mr1 ignores this
    conf.set("mapreduce.framework.name", "yarn");
    conf.setBoolean("yarn.is.minicluster", true);
    String rmAddress = jobConf.get("yarn.resourcemanager.address");
    if (rmAddress != null) {
        conf.set("yarn.resourcemanager.address", rmAddress);
    }
    String historyAddress = jobConf.get("mapreduce.jobhistory.address");
    if (historyAddress != null) {
        conf.set("mapreduce.jobhistory.address", historyAddress);
    }
    String schedulerAddress = jobConf.get("yarn.resourcemanager.scheduler.address");
    if (schedulerAddress != null) {
        conf.set("yarn.resourcemanager.scheduler.address", schedulerAddress);
    }
    String mrJobHistoryWebappAddress = jobConf.get("mapreduce.jobhistory.webapp.address");
    if (mrJobHistoryWebappAddress != null) {
        conf.set("mapreduce.jobhistory.webapp.address", mrJobHistoryWebappAddress);
    }
    String yarnRMWebappAddress = jobConf.get("yarn.resourcemanager.webapp.address");
    if (yarnRMWebappAddress != null) {
        conf.set("yarn.resourcemanager.webapp.address", yarnRMWebappAddress);
    }
}
Also used : MiniMRCluster(org.apache.hadoop.mapred.MiniMRCluster) JobConf(org.apache.hadoop.mapred.JobConf)

Example 9 with MiniMRCluster

use of org.apache.hadoop.mapred.MiniMRCluster in project hadoop by apache.

the class TestPipes method testPipes.

@Test
public void testPipes() throws IOException {
    if (System.getProperty("compile.c++") == null) {
        LOG.info("compile.c++ is not defined, so skipping TestPipes");
        return;
    }
    MiniDFSCluster dfs = null;
    MiniMRCluster mr = null;
    Path inputPath = new Path("testing/in");
    Path outputPath = new Path("testing/out");
    try {
        final int numWorkers = 2;
        Configuration conf = new Configuration();
        dfs = new MiniDFSCluster.Builder(conf).numDataNodes(numWorkers).build();
        mr = new MiniMRCluster(numWorkers, dfs.getFileSystem().getUri().toString(), 1);
        writeInputFile(dfs.getFileSystem(), inputPath);
        runProgram(mr, dfs, wordCountSimple, inputPath, outputPath, 3, 2, twoSplitOutput, null);
        cleanup(dfs.getFileSystem(), outputPath);
        runProgram(mr, dfs, wordCountSimple, inputPath, outputPath, 3, 0, noSortOutput, null);
        cleanup(dfs.getFileSystem(), outputPath);
        runProgram(mr, dfs, wordCountPart, inputPath, outputPath, 3, 2, fixedPartitionOutput, null);
        runNonPipedProgram(mr, dfs, wordCountNoPipes, null);
        mr.waitUntilIdle();
    } finally {
        mr.shutdown();
        dfs.shutdown();
    }
}
Also used : Path(org.apache.hadoop.fs.Path) MiniDFSCluster(org.apache.hadoop.hdfs.MiniDFSCluster) Configuration(org.apache.hadoop.conf.Configuration) MiniMRCluster(org.apache.hadoop.mapred.MiniMRCluster) Test(org.junit.Test)

Example 10 with MiniMRCluster

use of org.apache.hadoop.mapred.MiniMRCluster in project hadoop by apache.

the class TestMapReduceLazyOutput method testLazyOutput.

@Test
public void testLazyOutput() throws Exception {
    MiniDFSCluster dfs = null;
    MiniMRCluster mr = null;
    FileSystem fileSys = null;
    try {
        Configuration conf = new Configuration();
        // Start the mini-MR and mini-DFS clusters
        dfs = new MiniDFSCluster.Builder(conf).numDataNodes(NUM_HADOOP_WORKERS).build();
        fileSys = dfs.getFileSystem();
        mr = new MiniMRCluster(NUM_HADOOP_WORKERS, fileSys.getUri().toString(), 1);
        int numReducers = 2;
        int numMappers = NUM_HADOOP_WORKERS * NUM_MAPS_PER_NODE;
        createInput(fileSys, numMappers);
        Path output1 = new Path("/testlazy/output1");
        // Test 1.
        runTestLazyOutput(mr.createJobConf(), output1, numReducers, true);
        Path[] fileList = FileUtil.stat2Paths(fileSys.listStatus(output1, new Utils.OutputFileUtils.OutputFilesFilter()));
        for (int i = 0; i < fileList.length; ++i) {
            System.out.println("Test1 File list[" + i + "]" + ": " + fileList[i]);
        }
        assertTrue(fileList.length == (numReducers - 1));
        // Test 2. 0 Reducers, maps directly write to the output files
        Path output2 = new Path("/testlazy/output2");
        runTestLazyOutput(mr.createJobConf(), output2, 0, true);
        fileList = FileUtil.stat2Paths(fileSys.listStatus(output2, new Utils.OutputFileUtils.OutputFilesFilter()));
        for (int i = 0; i < fileList.length; ++i) {
            System.out.println("Test2 File list[" + i + "]" + ": " + fileList[i]);
        }
        assertTrue(fileList.length == numMappers - 1);
        // Test 3. 0 Reducers, but flag is turned off
        Path output3 = new Path("/testlazy/output3");
        runTestLazyOutput(mr.createJobConf(), output3, 0, false);
        fileList = FileUtil.stat2Paths(fileSys.listStatus(output3, new Utils.OutputFileUtils.OutputFilesFilter()));
        for (int i = 0; i < fileList.length; ++i) {
            System.out.println("Test3 File list[" + i + "]" + ": " + fileList[i]);
        }
        assertTrue(fileList.length == numMappers);
    } finally {
        if (dfs != null) {
            dfs.shutdown();
        }
        if (mr != null) {
            mr.shutdown();
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) MiniDFSCluster(org.apache.hadoop.hdfs.MiniDFSCluster) Configuration(org.apache.hadoop.conf.Configuration) Utils(org.apache.hadoop.mapred.Utils) FileSystem(org.apache.hadoop.fs.FileSystem) MiniMRCluster(org.apache.hadoop.mapred.MiniMRCluster) Test(org.junit.Test)

Aggregations

MiniMRCluster (org.apache.hadoop.mapred.MiniMRCluster)15 Path (org.apache.hadoop.fs.Path)8 JobConf (org.apache.hadoop.mapred.JobConf)8 Configuration (org.apache.hadoop.conf.Configuration)7 MiniDFSCluster (org.apache.hadoop.hdfs.MiniDFSCluster)7 FileSystem (org.apache.hadoop.fs.FileSystem)6 File (java.io.File)5 Before (org.junit.Before)5 Test (org.junit.Test)4 Utils (org.apache.hadoop.mapred.Utils)3 BeforeClass (org.junit.BeforeClass)3 BufferedReader (java.io.BufferedReader)2 DataOutputStream (java.io.DataOutputStream)2 IOException (java.io.IOException)2 InputStreamReader (java.io.InputStreamReader)2 InetAddress (java.net.InetAddress)2 ArrayList (java.util.ArrayList)2 Map (java.util.Map)2 HiveConf (org.apache.hadoop.hive.conf.HiveConf)2 FileOutputStream (java.io.FileOutputStream)1