Search in sources :

Example 1 with MiniDFSCluster

use of org.apache.hadoop.hdfs.MiniDFSCluster in project hadoop by apache.

the class TestFSRMStateStore method testFSRMStateStore.

@Test(timeout = 60000)
public void testFSRMStateStore() throws Exception {
    HdfsConfiguration conf = new HdfsConfiguration();
    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
    try {
        fsTester = new TestFSRMStateStoreTester(cluster, false);
        // If the state store is FileSystemRMStateStore then add corrupted entry.
        // It should discard the entry and remove it from file system.
        FSDataOutputStream fsOut = null;
        FileSystemRMStateStore fileSystemRMStateStore = (FileSystemRMStateStore) fsTester.getRMStateStore();
        String appAttemptIdStr3 = "appattempt_1352994193343_0001_000003";
        ApplicationAttemptId attemptId3 = ApplicationAttemptId.fromString(appAttemptIdStr3);
        Path appDir = fsTester.store.getAppDir(attemptId3.getApplicationId().toString());
        Path tempAppAttemptFile = new Path(appDir, attemptId3.toString() + ".tmp");
        fsOut = fileSystemRMStateStore.fs.create(tempAppAttemptFile, false);
        fsOut.write("Some random data ".getBytes());
        fsOut.close();
        testRMAppStateStore(fsTester);
        Assert.assertFalse(fsTester.workingDirPathURI.getFileSystem(conf).exists(tempAppAttemptFile));
        testRMDTSecretManagerStateStore(fsTester);
        testCheckVersion(fsTester);
        testEpoch(fsTester);
        testAppDeletion(fsTester);
        testDeleteStore(fsTester);
        testRemoveApplication(fsTester);
        testRemoveAttempt(fsTester);
        testAMRMTokenSecretManagerStateStore(fsTester);
        testReservationStateStore(fsTester);
    } finally {
        cluster.shutdown();
    }
}
Also used : Path(org.apache.hadoop.fs.Path) MiniDFSCluster(org.apache.hadoop.hdfs.MiniDFSCluster) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) HdfsConfiguration(org.apache.hadoop.hdfs.HdfsConfiguration) Test(org.junit.Test)

Example 2 with MiniDFSCluster

use of org.apache.hadoop.hdfs.MiniDFSCluster in project hadoop by apache.

the class TestFSRMStateStore method testHDFSRMStateStore.

@Test(timeout = 60000)
public void testHDFSRMStateStore() throws Exception {
    final HdfsConfiguration conf = new HdfsConfiguration();
    UserGroupInformation yarnAdmin = UserGroupInformation.createUserForTesting("yarn", new String[] { "admin" });
    final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
    cluster.getFileSystem().mkdir(new Path("/yarn"), FsPermission.valueOf("-rwxrwxrwx"));
    cluster.getFileSystem().setOwner(new Path("/yarn"), "yarn", "admin");
    final UserGroupInformation hdfsAdmin = UserGroupInformation.getCurrentUser();
    final StoreStateVerifier verifier = new StoreStateVerifier() {

        @Override
        void afterStoreApp(final RMStateStore store, final ApplicationId appId) {
            try {
                // Wait for things to settle
                Thread.sleep(5000);
                hdfsAdmin.doAs(new PrivilegedExceptionAction<Void>() {

                    @Override
                    public Void run() throws Exception {
                        verifyFilesUnreadablebyHDFS(cluster, ((FileSystemRMStateStore) store).getAppDir(appId));
                        return null;
                    }
                });
            } catch (Exception e) {
                throw new RuntimeException(e);
            }
        }

        @Override
        void afterStoreAppAttempt(final RMStateStore store, final ApplicationAttemptId appAttId) {
            try {
                // Wait for things to settle
                Thread.sleep(5000);
                hdfsAdmin.doAs(new PrivilegedExceptionAction<Void>() {

                    @Override
                    public Void run() throws Exception {
                        verifyFilesUnreadablebyHDFS(cluster, ((FileSystemRMStateStore) store).getAppAttemptDir(appAttId));
                        return null;
                    }
                });
            } catch (Exception e) {
                throw new RuntimeException(e);
            }
        }
    };
    try {
        yarnAdmin.doAs(new PrivilegedExceptionAction<Void>() {

            @Override
            public Void run() throws Exception {
                fsTester = new TestFSRMStateStoreTester(cluster, true);
                testRMAppStateStore(fsTester, verifier);
                return null;
            }
        });
    } finally {
        cluster.shutdown();
    }
}
Also used : Path(org.apache.hadoop.fs.Path) MiniDFSCluster(org.apache.hadoop.hdfs.MiniDFSCluster) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) HdfsConfiguration(org.apache.hadoop.hdfs.HdfsConfiguration) IOException(java.io.IOException) AccessControlException(org.apache.hadoop.security.AccessControlException) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) UserGroupInformation(org.apache.hadoop.security.UserGroupInformation) Test(org.junit.Test)

Example 3 with MiniDFSCluster

use of org.apache.hadoop.hdfs.MiniDFSCluster in project hadoop by apache.

the class TestCombineFileInputFormat method testSplitPlacement.

/**
   * The test suppresses unchecked warnings in
   * {@link org.mockito.Mockito#reset}. Although calling the method is
   * a bad manner, we call the method instead of splitting the test
   * (i.e. restarting MiniDFSCluster) to save time.
   */
@Test
@SuppressWarnings("unchecked")
public void testSplitPlacement() throws Exception {
    MiniDFSCluster dfs = null;
    FileSystem fileSys = null;
    try {
        /* Start 3 datanodes, one each in rack r1, r2, r3. Create five files
       * 1) file1 and file5, just after starting the datanode on r1, with 
       *    a repl factor of 1, and,
       * 2) file2, just after starting the datanode on r2, with 
       *    a repl factor of 2, and,
       * 3) file3, file4 after starting the all three datanodes, with a repl 
       *    factor of 3.
       * At the end, file1, file5 will be present on only datanode1, file2 will 
       * be present on datanode 1 and datanode2 and 
       * file3, file4 will be present on all datanodes. 
       */
        Configuration conf = new Configuration();
        conf.setBoolean("dfs.replication.considerLoad", false);
        dfs = new MiniDFSCluster.Builder(conf).racks(rack1).hosts(hosts1).build();
        dfs.waitActive();
        fileSys = dfs.getFileSystem();
        if (!fileSys.mkdirs(inDir)) {
            throw new IOException("Mkdirs failed to create " + inDir.toString());
        }
        Path file1 = new Path(dir1 + "/file1");
        writeFile(conf, file1, (short) 1, 1);
        // create another file on the same datanode
        Path file5 = new Path(dir5 + "/file5");
        writeFile(conf, file5, (short) 1, 1);
        // split it using a CombinedFile input format
        DummyInputFormat inFormat = new DummyInputFormat();
        Job job = Job.getInstance(conf);
        FileInputFormat.setInputPaths(job, dir1 + "," + dir5);
        List<InputSplit> splits = inFormat.getSplits(job);
        System.out.println("Made splits(Test0): " + splits.size());
        for (InputSplit split : splits) {
            System.out.println("File split(Test0): " + split);
        }
        assertEquals(1, splits.size());
        CombineFileSplit fileSplit = (CombineFileSplit) splits.get(0);
        assertEquals(2, fileSplit.getNumPaths());
        assertEquals(1, fileSplit.getLocations().length);
        assertEquals(file1.getName(), fileSplit.getPath(0).getName());
        assertEquals(0, fileSplit.getOffset(0));
        assertEquals(BLOCKSIZE, fileSplit.getLength(0));
        assertEquals(file5.getName(), fileSplit.getPath(1).getName());
        assertEquals(0, fileSplit.getOffset(1));
        assertEquals(BLOCKSIZE, fileSplit.getLength(1));
        assertEquals(hosts1[0], fileSplit.getLocations()[0]);
        dfs.startDataNodes(conf, 1, true, null, rack2, hosts2, null);
        dfs.waitActive();
        // create file on two datanodes.
        Path file2 = new Path(dir2 + "/file2");
        writeFile(conf, file2, (short) 2, 2);
        // split it using a CombinedFile input format
        inFormat = new DummyInputFormat();
        FileInputFormat.setInputPaths(job, dir1 + "," + dir2);
        inFormat.setMinSplitSizeRack(BLOCKSIZE);
        splits = inFormat.getSplits(job);
        System.out.println("Made splits(Test1): " + splits.size());
        for (InputSplit split : splits) {
            System.out.println("File split(Test1): " + split);
        }
        for (InputSplit split : splits) {
            fileSplit = (CombineFileSplit) split;
            /**
         * If rack1 is processed first by
         * {@link CombineFileInputFormat#createSplits},
         * create only one split on rack1. Otherwise create two splits.
         */
            if (splits.size() == 2) {
                // first split is on rack2, contains file2
                if (split.equals(splits.get(0))) {
                    assertEquals(2, fileSplit.getNumPaths());
                    assertEquals(1, fileSplit.getLocations().length);
                    assertEquals(file2.getName(), fileSplit.getPath(0).getName());
                    assertEquals(0, fileSplit.getOffset(0));
                    assertEquals(BLOCKSIZE, fileSplit.getLength(0));
                    assertEquals(file2.getName(), fileSplit.getPath(1).getName());
                    assertEquals(BLOCKSIZE, fileSplit.getOffset(1));
                    assertEquals(BLOCKSIZE, fileSplit.getLength(1));
                    assertEquals(hosts2[0], fileSplit.getLocations()[0]);
                }
                // second split is on rack1, contains file1
                if (split.equals(splits.get(1))) {
                    assertEquals(1, fileSplit.getNumPaths());
                    assertEquals(1, fileSplit.getLocations().length);
                    assertEquals(file1.getName(), fileSplit.getPath(0).getName());
                    assertEquals(0, fileSplit.getOffset(0));
                    assertEquals(BLOCKSIZE, fileSplit.getLength(0));
                    assertEquals(hosts1[0], fileSplit.getLocations()[0]);
                }
            } else if (splits.size() == 1) {
                // first split is on rack1, contains file1 and file2.
                assertEquals(3, fileSplit.getNumPaths());
                Set<Split> expected = new HashSet<>();
                expected.add(new Split(file1.getName(), BLOCKSIZE, 0));
                expected.add(new Split(file2.getName(), BLOCKSIZE, 0));
                expected.add(new Split(file2.getName(), BLOCKSIZE, BLOCKSIZE));
                List<Split> actual = new ArrayList<>();
                for (int i = 0; i < 3; i++) {
                    String name = fileSplit.getPath(i).getName();
                    long length = fileSplit.getLength(i);
                    long offset = fileSplit.getOffset(i);
                    actual.add(new Split(name, length, offset));
                }
                assertTrue(actual.containsAll(expected));
                assertEquals(1, fileSplit.getLocations().length);
                assertEquals(hosts1[0], fileSplit.getLocations()[0]);
            } else {
                fail("Expected split size is 1 or 2, but actual size is " + splits.size());
            }
        }
        // create another file on 3 datanodes and 3 racks.
        dfs.startDataNodes(conf, 1, true, null, rack3, hosts3, null);
        dfs.waitActive();
        Path file3 = new Path(dir3 + "/file3");
        writeFile(conf, new Path(dir3 + "/file3"), (short) 3, 3);
        inFormat = new DummyInputFormat();
        FileInputFormat.setInputPaths(job, dir1 + "," + dir2 + "," + dir3);
        inFormat.setMinSplitSizeRack(BLOCKSIZE);
        splits = inFormat.getSplits(job);
        for (InputSplit split : splits) {
            System.out.println("File split(Test2): " + split);
        }
        Set<Split> expected = new HashSet<>();
        expected.add(new Split(file1.getName(), BLOCKSIZE, 0));
        expected.add(new Split(file2.getName(), BLOCKSIZE, 0));
        expected.add(new Split(file2.getName(), BLOCKSIZE, BLOCKSIZE));
        expected.add(new Split(file3.getName(), BLOCKSIZE, 0));
        expected.add(new Split(file3.getName(), BLOCKSIZE, BLOCKSIZE));
        expected.add(new Split(file3.getName(), BLOCKSIZE, BLOCKSIZE * 2));
        List<Split> actual = new ArrayList<>();
        for (InputSplit split : splits) {
            fileSplit = (CombineFileSplit) split;
            /**
         * If rack1 is processed first by
         * {@link CombineFileInputFormat#createSplits},
         * create only one split on rack1.
         * If rack2 or rack3 is processed first and rack1 is processed second,
         * create one split on rack2 or rack3 and the other split is on rack1.
         * Otherwise create 3 splits for each rack.
         */
            if (splits.size() == 3) {
                // first split is on rack3, contains file3
                if (split.equals(splits.get(0))) {
                    assertEquals(3, fileSplit.getNumPaths());
                    assertEquals(1, fileSplit.getLocations().length);
                    assertEquals(file3.getName(), fileSplit.getPath(0).getName());
                    assertEquals(0, fileSplit.getOffset(0));
                    assertEquals(BLOCKSIZE, fileSplit.getLength(0));
                    assertEquals(file3.getName(), fileSplit.getPath(1).getName());
                    assertEquals(BLOCKSIZE, fileSplit.getOffset(1));
                    assertEquals(BLOCKSIZE, fileSplit.getLength(1));
                    assertEquals(file3.getName(), fileSplit.getPath(2).getName());
                    assertEquals(2 * BLOCKSIZE, fileSplit.getOffset(2));
                    assertEquals(BLOCKSIZE, fileSplit.getLength(2));
                    assertEquals(hosts3[0], fileSplit.getLocations()[0]);
                }
                // second split is on rack2, contains file2
                if (split.equals(splits.get(1))) {
                    assertEquals(2, fileSplit.getNumPaths());
                    assertEquals(1, fileSplit.getLocations().length);
                    assertEquals(file2.getName(), fileSplit.getPath(0).getName());
                    assertEquals(0, fileSplit.getOffset(0));
                    assertEquals(BLOCKSIZE, fileSplit.getLength(0));
                    assertEquals(file2.getName(), fileSplit.getPath(1).getName());
                    assertEquals(BLOCKSIZE, fileSplit.getOffset(1));
                    assertEquals(BLOCKSIZE, fileSplit.getLength(1));
                    assertEquals(hosts2[0], fileSplit.getLocations()[0]);
                }
                // third split is on rack1, contains file1
                if (split.equals(splits.get(2))) {
                    assertEquals(1, fileSplit.getNumPaths());
                    assertEquals(1, fileSplit.getLocations().length);
                    assertEquals(file1.getName(), fileSplit.getPath(0).getName());
                    assertEquals(0, fileSplit.getOffset(0));
                    assertEquals(BLOCKSIZE, fileSplit.getLength(0));
                    assertEquals(hosts1[0], fileSplit.getLocations()[0]);
                }
            } else if (splits.size() == 2) {
                // first split is on rack2 or rack3, contains one or two files.
                if (split.equals(splits.get(0))) {
                    assertEquals(1, fileSplit.getLocations().length);
                    if (fileSplit.getLocations()[0].equals(hosts2[0])) {
                        assertEquals(2, fileSplit.getNumPaths());
                    } else if (fileSplit.getLocations()[0].equals(hosts3[0])) {
                        assertEquals(3, fileSplit.getNumPaths());
                    } else {
                        fail("First split should be on rack2 or rack3.");
                    }
                }
                // second split is on rack1, contains the rest files.
                if (split.equals(splits.get(1))) {
                    assertEquals(1, fileSplit.getLocations().length);
                    assertEquals(hosts1[0], fileSplit.getLocations()[0]);
                }
            } else if (splits.size() == 1) {
                // first split is rack1, contains all three files.
                assertEquals(1, fileSplit.getLocations().length);
                assertEquals(6, fileSplit.getNumPaths());
                assertEquals(hosts1[0], fileSplit.getLocations()[0]);
            } else {
                fail("Split size should be 1, 2, or 3.");
            }
            for (int i = 0; i < fileSplit.getNumPaths(); i++) {
                String name = fileSplit.getPath(i).getName();
                long length = fileSplit.getLength(i);
                long offset = fileSplit.getOffset(i);
                actual.add(new Split(name, length, offset));
            }
        }
        assertEquals(6, actual.size());
        assertTrue(actual.containsAll(expected));
        // create file4 on all three racks
        Path file4 = new Path(dir4 + "/file4");
        writeFile(conf, file4, (short) 3, 3);
        inFormat = new DummyInputFormat();
        FileInputFormat.setInputPaths(job, dir1 + "," + dir2 + "," + dir3 + "," + dir4);
        inFormat.setMinSplitSizeRack(BLOCKSIZE);
        splits = inFormat.getSplits(job);
        for (InputSplit split : splits) {
            System.out.println("File split(Test3): " + split);
        }
        expected.add(new Split(file4.getName(), BLOCKSIZE, 0));
        expected.add(new Split(file4.getName(), BLOCKSIZE, BLOCKSIZE));
        expected.add(new Split(file4.getName(), BLOCKSIZE, BLOCKSIZE * 2));
        actual.clear();
        for (InputSplit split : splits) {
            fileSplit = (CombineFileSplit) split;
            /**
         * If rack1 is processed first by
         * {@link CombineFileInputFormat#createSplits},
         * create only one split on rack1.
         * If rack2 or rack3 is processed first and rack1 is processed second,
         * create one split on rack2 or rack3 and the other split is on rack1.
         * Otherwise create 3 splits for each rack.
         */
            if (splits.size() == 3) {
                // first split is on rack3, contains file3 and file4
                if (split.equals(splits.get(0))) {
                    assertEquals(6, fileSplit.getNumPaths());
                    assertEquals(1, fileSplit.getLocations().length);
                    assertEquals(hosts3[0], fileSplit.getLocations()[0]);
                }
                // second split is on rack2, contains file2
                if (split.equals(splits.get(1))) {
                    assertEquals(2, fileSplit.getNumPaths());
                    assertEquals(1, fileSplit.getLocations().length);
                    assertEquals(file2.getName(), fileSplit.getPath(0).getName());
                    assertEquals(0, fileSplit.getOffset(0));
                    assertEquals(BLOCKSIZE, fileSplit.getLength(0));
                    assertEquals(file2.getName(), fileSplit.getPath(1).getName());
                    assertEquals(BLOCKSIZE, fileSplit.getOffset(1));
                    assertEquals(BLOCKSIZE, fileSplit.getLength(1));
                    assertEquals(hosts2[0], fileSplit.getLocations()[0]);
                }
                // third split is on rack1, contains file1
                if (split.equals(splits.get(2))) {
                    assertEquals(1, fileSplit.getNumPaths());
                    assertEquals(1, fileSplit.getLocations().length);
                    assertEquals(file1.getName(), fileSplit.getPath(0).getName());
                    assertEquals(0, fileSplit.getOffset(0));
                    assertEquals(BLOCKSIZE, fileSplit.getLength(0));
                    assertEquals(hosts1[0], fileSplit.getLocations()[0]);
                }
            } else if (splits.size() == 2) {
                // first split is on rack2 or rack3, contains two or three files.
                if (split.equals(splits.get(0))) {
                    assertEquals(1, fileSplit.getLocations().length);
                    if (fileSplit.getLocations()[0].equals(hosts2[0])) {
                        assertEquals(5, fileSplit.getNumPaths());
                    } else if (fileSplit.getLocations()[0].equals(hosts3[0])) {
                        assertEquals(6, fileSplit.getNumPaths());
                    } else {
                        fail("First split should be on rack2 or rack3.");
                    }
                }
                // second split is on rack1, contains the rest files.
                if (split.equals(splits.get(1))) {
                    assertEquals(1, fileSplit.getLocations().length);
                    assertEquals(hosts1[0], fileSplit.getLocations()[0]);
                }
            } else if (splits.size() == 1) {
                // first split is rack1, contains all four files.
                assertEquals(1, fileSplit.getLocations().length);
                assertEquals(9, fileSplit.getNumPaths());
                assertEquals(hosts1[0], fileSplit.getLocations()[0]);
            } else {
                fail("Split size should be 1, 2, or 3.");
            }
            for (int i = 0; i < fileSplit.getNumPaths(); i++) {
                String name = fileSplit.getPath(i).getName();
                long length = fileSplit.getLength(i);
                long offset = fileSplit.getOffset(i);
                actual.add(new Split(name, length, offset));
            }
        }
        assertEquals(9, actual.size());
        assertTrue(actual.containsAll(expected));
        // maximum split size is 2 blocks 
        inFormat = new DummyInputFormat();
        inFormat.setMinSplitSizeNode(BLOCKSIZE);
        inFormat.setMaxSplitSize(2 * BLOCKSIZE);
        FileInputFormat.setInputPaths(job, dir1 + "," + dir2 + "," + dir3 + "," + dir4);
        splits = inFormat.getSplits(job);
        for (InputSplit split : splits) {
            System.out.println("File split(Test4): " + split);
        }
        assertEquals(5, splits.size());
        actual.clear();
        reset(mockList);
        for (InputSplit split : splits) {
            fileSplit = (CombineFileSplit) split;
            for (int i = 0; i < fileSplit.getNumPaths(); i++) {
                String name = fileSplit.getPath(i).getName();
                long length = fileSplit.getLength(i);
                long offset = fileSplit.getOffset(i);
                actual.add(new Split(name, length, offset));
            }
            mockList.add(fileSplit.getLocations()[0]);
        }
        assertEquals(9, actual.size());
        assertTrue(actual.containsAll(expected));
        // verify the splits are on all the racks
        verify(mockList, atLeastOnce()).add(hosts1[0]);
        verify(mockList, atLeastOnce()).add(hosts2[0]);
        verify(mockList, atLeastOnce()).add(hosts3[0]);
        // maximum split size is 3 blocks 
        inFormat = new DummyInputFormat();
        inFormat.setMinSplitSizeNode(BLOCKSIZE);
        inFormat.setMaxSplitSize(3 * BLOCKSIZE);
        FileInputFormat.setInputPaths(job, dir1 + "," + dir2 + "," + dir3 + "," + dir4);
        splits = inFormat.getSplits(job);
        for (InputSplit split : splits) {
            System.out.println("File split(Test5): " + split);
        }
        assertEquals(3, splits.size());
        actual.clear();
        reset(mockList);
        for (InputSplit split : splits) {
            fileSplit = (CombineFileSplit) split;
            for (int i = 0; i < fileSplit.getNumPaths(); i++) {
                String name = fileSplit.getPath(i).getName();
                long length = fileSplit.getLength(i);
                long offset = fileSplit.getOffset(i);
                actual.add(new Split(name, length, offset));
            }
            mockList.add(fileSplit.getLocations()[0]);
        }
        assertEquals(9, actual.size());
        assertTrue(actual.containsAll(expected));
        verify(mockList, atLeastOnce()).add(hosts1[0]);
        verify(mockList, atLeastOnce()).add(hosts2[0]);
        // maximum split size is 4 blocks 
        inFormat = new DummyInputFormat();
        inFormat.setMaxSplitSize(4 * BLOCKSIZE);
        FileInputFormat.setInputPaths(job, dir1 + "," + dir2 + "," + dir3 + "," + dir4);
        splits = inFormat.getSplits(job);
        for (InputSplit split : splits) {
            System.out.println("File split(Test6): " + split);
        }
        assertEquals(3, splits.size());
        actual.clear();
        reset(mockList);
        for (InputSplit split : splits) {
            fileSplit = (CombineFileSplit) split;
            for (int i = 0; i < fileSplit.getNumPaths(); i++) {
                String name = fileSplit.getPath(i).getName();
                long length = fileSplit.getLength(i);
                long offset = fileSplit.getOffset(i);
                actual.add(new Split(name, length, offset));
            }
            mockList.add(fileSplit.getLocations()[0]);
        }
        assertEquals(9, actual.size());
        assertTrue(actual.containsAll(expected));
        verify(mockList, atLeastOnce()).add(hosts1[0]);
        // maximum split size is 7 blocks and min is 3 blocks
        inFormat = new DummyInputFormat();
        inFormat.setMaxSplitSize(7 * BLOCKSIZE);
        inFormat.setMinSplitSizeNode(3 * BLOCKSIZE);
        inFormat.setMinSplitSizeRack(3 * BLOCKSIZE);
        FileInputFormat.setInputPaths(job, dir1 + "," + dir2 + "," + dir3 + "," + dir4);
        splits = inFormat.getSplits(job);
        for (InputSplit split : splits) {
            System.out.println("File split(Test7): " + split);
        }
        assertEquals(2, splits.size());
        actual.clear();
        reset(mockList);
        for (InputSplit split : splits) {
            fileSplit = (CombineFileSplit) split;
            for (int i = 0; i < fileSplit.getNumPaths(); i++) {
                String name = fileSplit.getPath(i).getName();
                long length = fileSplit.getLength(i);
                long offset = fileSplit.getOffset(i);
                actual.add(new Split(name, length, offset));
            }
            mockList.add(fileSplit.getLocations()[0]);
        }
        assertEquals(9, actual.size());
        assertTrue(actual.containsAll(expected));
        verify(mockList, atLeastOnce()).add(hosts1[0]);
        // Rack 1 has file1, file2 and file3 and file4
        // Rack 2 has file2 and file3 and file4
        // Rack 3 has file3 and file4
        // setup a filter so that only (file1 and file2) or (file3 and file4)
        // can be combined
        inFormat = new DummyInputFormat();
        FileInputFormat.addInputPath(job, inDir);
        // everything is at least rack local
        inFormat.setMinSplitSizeRack(1);
        inFormat.createPool(new TestFilter(dir1), new TestFilter(dir2));
        splits = inFormat.getSplits(job);
        for (InputSplit split : splits) {
            System.out.println("File split(Test1): " + split);
        }
        for (InputSplit split : splits) {
            fileSplit = (CombineFileSplit) split;
            if (splits.size() == 2) {
                // first split is on rack1, contains file1 and file2.
                if (split.equals(splits.get(0))) {
                    assertEquals(3, fileSplit.getNumPaths());
                    expected.clear();
                    expected.add(new Split(file1.getName(), BLOCKSIZE, 0));
                    expected.add(new Split(file2.getName(), BLOCKSIZE, 0));
                    expected.add(new Split(file2.getName(), BLOCKSIZE, BLOCKSIZE));
                    actual.clear();
                    for (int i = 0; i < 3; i++) {
                        String name = fileSplit.getPath(i).getName();
                        long length = fileSplit.getLength(i);
                        long offset = fileSplit.getOffset(i);
                        actual.add(new Split(name, length, offset));
                    }
                    assertTrue(actual.containsAll(expected));
                    assertEquals(1, fileSplit.getLocations().length);
                    assertEquals(hosts1[0], fileSplit.getLocations()[0]);
                }
                if (split.equals(splits.get(1))) {
                    // second split contains the file3 and file4, however,
                    // the locations is undetermined.
                    assertEquals(6, fileSplit.getNumPaths());
                    expected.clear();
                    expected.add(new Split(file3.getName(), BLOCKSIZE, 0));
                    expected.add(new Split(file3.getName(), BLOCKSIZE, BLOCKSIZE));
                    expected.add(new Split(file3.getName(), BLOCKSIZE, BLOCKSIZE * 2));
                    expected.add(new Split(file4.getName(), BLOCKSIZE, 0));
                    expected.add(new Split(file4.getName(), BLOCKSIZE, BLOCKSIZE));
                    expected.add(new Split(file4.getName(), BLOCKSIZE, BLOCKSIZE * 2));
                    actual.clear();
                    for (int i = 0; i < 6; i++) {
                        String name = fileSplit.getPath(i).getName();
                        long length = fileSplit.getLength(i);
                        long offset = fileSplit.getOffset(i);
                        actual.add(new Split(name, length, offset));
                    }
                    assertTrue(actual.containsAll(expected));
                    assertEquals(1, fileSplit.getLocations().length);
                }
            } else if (splits.size() == 3) {
                if (split.equals(splits.get(0))) {
                    // first split is on rack2, contains file2
                    assertEquals(2, fileSplit.getNumPaths());
                    expected.clear();
                    expected.add(new Split(file2.getName(), BLOCKSIZE, 0));
                    expected.add(new Split(file2.getName(), BLOCKSIZE, BLOCKSIZE));
                    actual.clear();
                    for (int i = 0; i < 2; i++) {
                        String name = fileSplit.getPath(i).getName();
                        long length = fileSplit.getLength(i);
                        long offset = fileSplit.getOffset(i);
                        actual.add(new Split(name, length, offset));
                    }
                    assertTrue(actual.containsAll(expected));
                    assertEquals(1, fileSplit.getLocations().length);
                    assertEquals(hosts2[0], fileSplit.getLocations()[0]);
                }
                if (split.equals(splits.get(1))) {
                    // second split is on rack1, contains file1
                    assertEquals(1, fileSplit.getNumPaths());
                    assertEquals(file1.getName(), fileSplit.getPath(0).getName());
                    assertEquals(BLOCKSIZE, fileSplit.getLength(0));
                    assertEquals(0, fileSplit.getOffset(0));
                    assertEquals(1, fileSplit.getLocations().length);
                    assertEquals(hosts1[0], fileSplit.getLocations()[0]);
                }
                if (split.equals(splits.get(2))) {
                    // third split contains file3 and file4, however,
                    // the locations is undetermined.
                    assertEquals(6, fileSplit.getNumPaths());
                    expected.clear();
                    expected.add(new Split(file3.getName(), BLOCKSIZE, 0));
                    expected.add(new Split(file3.getName(), BLOCKSIZE, BLOCKSIZE));
                    expected.add(new Split(file3.getName(), BLOCKSIZE, BLOCKSIZE * 2));
                    expected.add(new Split(file4.getName(), BLOCKSIZE, 0));
                    expected.add(new Split(file4.getName(), BLOCKSIZE, BLOCKSIZE));
                    expected.add(new Split(file4.getName(), BLOCKSIZE, BLOCKSIZE * 2));
                    actual.clear();
                    for (int i = 0; i < 6; i++) {
                        String name = fileSplit.getPath(i).getName();
                        long length = fileSplit.getLength(i);
                        long offset = fileSplit.getOffset(i);
                        actual.add(new Split(name, length, offset));
                    }
                    assertTrue(actual.containsAll(expected));
                    assertEquals(1, fileSplit.getLocations().length);
                }
            } else {
                fail("Split size should be 2 or 3.");
            }
        }
        // measure performance when there are multiple pools and
        // many files in each pool.
        int numPools = 100;
        int numFiles = 1000;
        DummyInputFormat1 inFormat1 = new DummyInputFormat1();
        for (int i = 0; i < numFiles; i++) {
            FileInputFormat.setInputPaths(job, file1);
        }
        // everything is at least rack local
        inFormat1.setMinSplitSizeRack(1);
        final Path dirNoMatch1 = new Path(inDir, "/dirxx");
        final Path dirNoMatch2 = new Path(inDir, "/diryy");
        for (int i = 0; i < numPools; i++) {
            inFormat1.createPool(new TestFilter(dirNoMatch1), new TestFilter(dirNoMatch2));
        }
        long start = System.currentTimeMillis();
        splits = inFormat1.getSplits(job);
        long end = System.currentTimeMillis();
        System.out.println("Elapsed time for " + numPools + " pools " + " and " + numFiles + " files is " + ((end - start) / 1000) + " seconds.");
        // This file has three whole blocks. If the maxsplit size is
        // half the block size, then there should be six splits.
        inFormat = new DummyInputFormat();
        inFormat.setMaxSplitSize(BLOCKSIZE / 2);
        FileInputFormat.setInputPaths(job, dir3);
        splits = inFormat.getSplits(job);
        for (InputSplit split : splits) {
            System.out.println("File split(Test8): " + split);
        }
        assertEquals(splits.size(), 6);
    } finally {
        if (dfs != null) {
            dfs.shutdown();
        }
    }
}
Also used : Set(java.util.Set) HashSet(java.util.HashSet) Configuration(org.apache.hadoop.conf.Configuration) ArrayList(java.util.ArrayList) FileSystem(org.apache.hadoop.fs.FileSystem) DistributedFileSystem(org.apache.hadoop.hdfs.DistributedFileSystem) List(java.util.List) ArrayList(java.util.ArrayList) Job(org.apache.hadoop.mapreduce.Job) InputSplit(org.apache.hadoop.mapreduce.InputSplit) HashSet(java.util.HashSet) Path(org.apache.hadoop.fs.Path) MiniDFSCluster(org.apache.hadoop.hdfs.MiniDFSCluster) IOException(java.io.IOException) InputSplit(org.apache.hadoop.mapreduce.InputSplit) Test(org.junit.Test)

Example 4 with MiniDFSCluster

use of org.apache.hadoop.hdfs.MiniDFSCluster in project hadoop by apache.

the class TestDelegatingInputFormat method testSplitting.

@SuppressWarnings("unchecked")
public void testSplitting() throws Exception {
    Job job = Job.getInstance();
    MiniDFSCluster dfs = null;
    try {
        dfs = new MiniDFSCluster.Builder(job.getConfiguration()).numDataNodes(4).racks(new String[] { "/rack0", "/rack0", "/rack1", "/rack1" }).hosts(new String[] { "host0", "host1", "host2", "host3" }).build();
        FileSystem fs = dfs.getFileSystem();
        Path path = getPath("/foo/bar", fs);
        Path path2 = getPath("/foo/baz", fs);
        Path path3 = getPath("/bar/bar", fs);
        Path path4 = getPath("/bar/baz", fs);
        final int numSplits = 100;
        FileInputFormat.setMaxInputSplitSize(job, fs.getFileStatus(path).getLen() / numSplits);
        MultipleInputs.addInputPath(job, path, TextInputFormat.class, MapClass.class);
        MultipleInputs.addInputPath(job, path2, TextInputFormat.class, MapClass2.class);
        MultipleInputs.addInputPath(job, path3, KeyValueTextInputFormat.class, MapClass.class);
        MultipleInputs.addInputPath(job, path4, TextInputFormat.class, MapClass2.class);
        DelegatingInputFormat inFormat = new DelegatingInputFormat();
        int[] bins = new int[3];
        for (InputSplit split : (List<InputSplit>) inFormat.getSplits(job)) {
            assertTrue(split instanceof TaggedInputSplit);
            final TaggedInputSplit tis = (TaggedInputSplit) split;
            int index = -1;
            if (tis.getInputFormatClass().equals(KeyValueTextInputFormat.class)) {
                // path3
                index = 0;
            } else if (tis.getMapperClass().equals(MapClass.class)) {
                // path
                index = 1;
            } else {
                // path2 and path4
                index = 2;
            }
            bins[index]++;
        }
        assertEquals("count is not equal to num splits", numSplits, bins[0]);
        assertEquals("count is not equal to num splits", numSplits, bins[1]);
        assertEquals("count is not equal to 2 * num splits", numSplits * 2, bins[2]);
    } finally {
        if (dfs != null) {
            dfs.shutdown();
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) MiniDFSCluster(org.apache.hadoop.hdfs.MiniDFSCluster) FileSystem(org.apache.hadoop.fs.FileSystem) List(java.util.List) Job(org.apache.hadoop.mapreduce.Job) InputSplit(org.apache.hadoop.mapreduce.InputSplit)

Example 5 with MiniDFSCluster

use of org.apache.hadoop.hdfs.MiniDFSCluster in project hadoop by apache.

the class TestCommandLineJobSubmission method testJobShell.

@Test
public void testJobShell() throws Exception {
    MiniDFSCluster dfs = null;
    MiniMRCluster mr = null;
    FileSystem fs = null;
    Path testFile = new Path(input, "testfile");
    try {
        Configuration conf = new Configuration();
        //start the mini mr and dfs cluster.
        dfs = new MiniDFSCluster.Builder(conf).numDataNodes(2).build();
        fs = dfs.getFileSystem();
        FSDataOutputStream stream = fs.create(testFile);
        stream.write("teststring".getBytes());
        stream.close();
        mr = new MiniMRCluster(2, fs.getUri().toString(), 1);
        File thisbuildDir = new File(buildDir, "jobCommand");
        assertTrue("create build dir", thisbuildDir.mkdirs());
        File f = new File(thisbuildDir, "files_tmp");
        FileOutputStream fstream = new FileOutputStream(f);
        fstream.write("somestrings".getBytes());
        fstream.close();
        File f1 = new File(thisbuildDir, "files_tmp1");
        fstream = new FileOutputStream(f1);
        fstream.write("somestrings".getBytes());
        fstream.close();
        // copy files to dfs
        Path cachePath = new Path("/cacheDir");
        if (!fs.mkdirs(cachePath)) {
            throw new IOException("Mkdirs failed to create " + cachePath.toString());
        }
        Path localCachePath = new Path(System.getProperty("test.cache.data"));
        Path txtPath = new Path(localCachePath, new Path("test.txt"));
        Path jarPath = new Path(localCachePath, new Path("test.jar"));
        Path zipPath = new Path(localCachePath, new Path("test.zip"));
        Path tarPath = new Path(localCachePath, new Path("test.tar"));
        Path tgzPath = new Path(localCachePath, new Path("test.tgz"));
        fs.copyFromLocalFile(txtPath, cachePath);
        fs.copyFromLocalFile(jarPath, cachePath);
        fs.copyFromLocalFile(zipPath, cachePath);
        // construct options for -files
        String[] files = new String[3];
        files[0] = f.toString();
        files[1] = f1.toString() + "#localfilelink";
        files[2] = fs.getUri().resolve(cachePath + "/test.txt#dfsfilelink").toString();
        // construct options for -libjars
        String[] libjars = new String[2];
        libjars[0] = "build/test/mapred/testjar/testjob.jar";
        libjars[1] = fs.getUri().resolve(cachePath + "/test.jar").toString();
        // construct options for archives
        String[] archives = new String[3];
        archives[0] = tgzPath.toString();
        archives[1] = tarPath + "#tarlink";
        archives[2] = fs.getUri().resolve(cachePath + "/test.zip#ziplink").toString();
        String[] args = new String[10];
        args[0] = "-files";
        args[1] = StringUtils.arrayToString(files);
        args[2] = "-libjars";
        // the testjob.jar as a temporary jar file 
        // rather than creating its own
        args[3] = StringUtils.arrayToString(libjars);
        args[4] = "-archives";
        args[5] = StringUtils.arrayToString(archives);
        args[6] = "-D";
        args[7] = "mapred.output.committer.class=testjar.CustomOutputCommitter";
        args[8] = input.toString();
        args[9] = output.toString();
        JobConf jobConf = mr.createJobConf();
        //before running the job, verify that libjar is not in client classpath
        assertTrue("libjar not in client classpath", loadLibJar(jobConf) == null);
        int ret = ToolRunner.run(jobConf, new testshell.ExternalMapReduce(), args);
        //after running the job, verify that libjar is in the client classpath
        assertTrue("libjar added to client classpath", loadLibJar(jobConf) != null);
        assertTrue("not failed ", ret != -1);
        f.delete();
        thisbuildDir.delete();
    } finally {
        if (dfs != null) {
            dfs.shutdown();
        }
        ;
        if (mr != null) {
            mr.shutdown();
        }
        ;
    }
}
Also used : MiniDFSCluster(org.apache.hadoop.hdfs.MiniDFSCluster) Configuration(org.apache.hadoop.conf.Configuration) IOException(java.io.IOException) FileOutputStream(java.io.FileOutputStream) File(java.io.File) Test(org.junit.Test)

Aggregations

MiniDFSCluster (org.apache.hadoop.hdfs.MiniDFSCluster)545 Test (org.junit.Test)442 Configuration (org.apache.hadoop.conf.Configuration)418 HdfsConfiguration (org.apache.hadoop.hdfs.HdfsConfiguration)316 Path (org.apache.hadoop.fs.Path)302 FileSystem (org.apache.hadoop.fs.FileSystem)219 DistributedFileSystem (org.apache.hadoop.hdfs.DistributedFileSystem)193 IOException (java.io.IOException)111 File (java.io.File)88 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)67 ExtendedBlock (org.apache.hadoop.hdfs.protocol.ExtendedBlock)53 DataNode (org.apache.hadoop.hdfs.server.datanode.DataNode)35 RandomAccessFile (java.io.RandomAccessFile)33 MetricsRecordBuilder (org.apache.hadoop.metrics2.MetricsRecordBuilder)33 ArrayList (java.util.ArrayList)30 URI (java.net.URI)29 LocatedBlock (org.apache.hadoop.hdfs.protocol.LocatedBlock)28 FsPermission (org.apache.hadoop.fs.permission.FsPermission)26 FSNamesystem (org.apache.hadoop.hdfs.server.namenode.FSNamesystem)26 HttpServerFunctionalTest (org.apache.hadoop.http.HttpServerFunctionalTest)24