Search in sources :

Example 36 with Set

use of java.util.Set in project hadoop by apache.

the class RMNodeLabelsManager method getModifiedNodeLabelsMappings.

private Map<NodeId, Set<String>> getModifiedNodeLabelsMappings(Map<NodeId, Set<String>> replaceLabelsToNode) {
    Map<NodeId, Set<String>> effectiveModifiedLabels = new HashMap<>();
    for (Entry<NodeId, Set<String>> nodeLabelMappingEntry : replaceLabelsToNode.entrySet()) {
        NodeId nodeId = nodeLabelMappingEntry.getKey();
        Set<String> modifiedNodeLabels = nodeLabelMappingEntry.getValue();
        Set<String> labelsBeforeModification = null;
        Host host = nodeCollections.get(nodeId.getHost());
        if (host == null) {
            effectiveModifiedLabels.put(nodeId, modifiedNodeLabels);
            continue;
        } else if (nodeId.getPort() == WILDCARD_PORT) {
            labelsBeforeModification = host.labels;
        } else if (host.nms.get(nodeId) != null) {
            labelsBeforeModification = host.nms.get(nodeId).labels;
        }
        if (labelsBeforeModification == null || labelsBeforeModification.size() != modifiedNodeLabels.size() || !labelsBeforeModification.containsAll(modifiedNodeLabels)) {
            effectiveModifiedLabels.put(nodeId, modifiedNodeLabels);
        }
    }
    return effectiveModifiedLabels;
}
Also used : ImmutableSet(com.google.common.collect.ImmutableSet) Set(java.util.Set) HashSet(java.util.HashSet) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) NodeId(org.apache.hadoop.yarn.api.records.NodeId)

Example 37 with Set

use of java.util.Set in project hadoop by apache.

the class RMNodeLabelsManager method reinitializeQueueLabels.

public void reinitializeQueueLabels(Map<String, Set<String>> queueToLabels) {
    try {
        writeLock.lock();
        // clear before set
        this.queueCollections.clear();
        for (Entry<String, Set<String>> entry : queueToLabels.entrySet()) {
            String queue = entry.getKey();
            Queue q = new Queue();
            this.queueCollections.put(queue, q);
            Set<String> labels = entry.getValue();
            if (labels.contains(ANY)) {
                continue;
            }
            q.accessibleNodeLabels.addAll(labels);
            for (Host host : nodeCollections.values()) {
                for (Entry<NodeId, Node> nentry : host.nms.entrySet()) {
                    NodeId nodeId = nentry.getKey();
                    Node nm = nentry.getValue();
                    if (nm.running && isNodeUsableByQueue(getLabelsByNode(nodeId), q)) {
                        Resources.addTo(q.resource, nm.resource);
                    }
                }
            }
        }
    } finally {
        writeLock.unlock();
    }
}
Also used : ImmutableSet(com.google.common.collect.ImmutableSet) Set(java.util.Set) HashSet(java.util.HashSet) NodeId(org.apache.hadoop.yarn.api.records.NodeId)

Example 38 with Set

use of java.util.Set in project hadoop by apache.

the class TestCombineFileInputFormat method testNodeInputSplit.

@Test
public void testNodeInputSplit() throws IOException, InterruptedException {
    // Regression test for MAPREDUCE-4892. There are 2 nodes with all blocks on 
    // both nodes. The grouping ensures that both nodes get splits instead of 
    // just the first node
    DummyInputFormat inFormat = new DummyInputFormat();
    int numBlocks = 12;
    long totLength = 0;
    long blockSize = 100;
    long maxSize = 200;
    long minSizeNode = 50;
    long minSizeRack = 50;
    String[] locations = { "h1", "h2" };
    String[] racks = new String[0];
    Path path = new Path("hdfs://file");
    OneBlockInfo[] blocks = new OneBlockInfo[numBlocks];
    for (int i = 0; i < numBlocks; ++i) {
        blocks[i] = new OneBlockInfo(path, i * blockSize, blockSize, locations, racks);
        totLength += blockSize;
    }
    List<InputSplit> splits = new ArrayList<InputSplit>();
    HashMap<String, Set<String>> rackToNodes = new HashMap<String, Set<String>>();
    HashMap<String, List<OneBlockInfo>> rackToBlocks = new HashMap<String, List<OneBlockInfo>>();
    HashMap<OneBlockInfo, String[]> blockToNodes = new HashMap<OneBlockInfo, String[]>();
    HashMap<String, Set<OneBlockInfo>> nodeToBlocks = new HashMap<String, Set<OneBlockInfo>>();
    OneFileInfo.populateBlockInfo(blocks, rackToBlocks, blockToNodes, nodeToBlocks, rackToNodes);
    inFormat.createSplits(nodeToBlocks, blockToNodes, rackToBlocks, totLength, maxSize, minSizeNode, minSizeRack, splits);
    int expectedSplitCount = (int) (totLength / maxSize);
    assertEquals(expectedSplitCount, splits.size());
    HashMultiset<String> nodeSplits = HashMultiset.create();
    for (int i = 0; i < expectedSplitCount; ++i) {
        InputSplit inSplit = splits.get(i);
        assertEquals(maxSize, inSplit.getLength());
        assertEquals(1, inSplit.getLocations().length);
        nodeSplits.add(inSplit.getLocations()[0]);
    }
    assertEquals(3, nodeSplits.count(locations[0]));
    assertEquals(3, nodeSplits.count(locations[1]));
}
Also used : Path(org.apache.hadoop.fs.Path) Set(java.util.Set) HashSet(java.util.HashSet) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) OneBlockInfo(org.apache.hadoop.mapreduce.lib.input.CombineFileInputFormat.OneBlockInfo) List(java.util.List) ArrayList(java.util.ArrayList) InputSplit(org.apache.hadoop.mapreduce.InputSplit) Test(org.junit.Test)

Example 39 with Set

use of java.util.Set in project hadoop by apache.

the class TestCombineFileInputFormat method testSplitPlacement.

/**
   * The test suppresses unchecked warnings in
   * {@link org.mockito.Mockito#reset}. Although calling the method is
   * a bad manner, we call the method instead of splitting the test
   * (i.e. restarting MiniDFSCluster) to save time.
   */
@Test
@SuppressWarnings("unchecked")
public void testSplitPlacement() throws Exception {
    MiniDFSCluster dfs = null;
    FileSystem fileSys = null;
    try {
        /* Start 3 datanodes, one each in rack r1, r2, r3. Create five files
       * 1) file1 and file5, just after starting the datanode on r1, with 
       *    a repl factor of 1, and,
       * 2) file2, just after starting the datanode on r2, with 
       *    a repl factor of 2, and,
       * 3) file3, file4 after starting the all three datanodes, with a repl 
       *    factor of 3.
       * At the end, file1, file5 will be present on only datanode1, file2 will 
       * be present on datanode 1 and datanode2 and 
       * file3, file4 will be present on all datanodes. 
       */
        Configuration conf = new Configuration();
        conf.setBoolean("dfs.replication.considerLoad", false);
        dfs = new MiniDFSCluster.Builder(conf).racks(rack1).hosts(hosts1).build();
        dfs.waitActive();
        fileSys = dfs.getFileSystem();
        if (!fileSys.mkdirs(inDir)) {
            throw new IOException("Mkdirs failed to create " + inDir.toString());
        }
        Path file1 = new Path(dir1 + "/file1");
        writeFile(conf, file1, (short) 1, 1);
        // create another file on the same datanode
        Path file5 = new Path(dir5 + "/file5");
        writeFile(conf, file5, (short) 1, 1);
        // split it using a CombinedFile input format
        DummyInputFormat inFormat = new DummyInputFormat();
        Job job = Job.getInstance(conf);
        FileInputFormat.setInputPaths(job, dir1 + "," + dir5);
        List<InputSplit> splits = inFormat.getSplits(job);
        System.out.println("Made splits(Test0): " + splits.size());
        for (InputSplit split : splits) {
            System.out.println("File split(Test0): " + split);
        }
        assertEquals(1, splits.size());
        CombineFileSplit fileSplit = (CombineFileSplit) splits.get(0);
        assertEquals(2, fileSplit.getNumPaths());
        assertEquals(1, fileSplit.getLocations().length);
        assertEquals(file1.getName(), fileSplit.getPath(0).getName());
        assertEquals(0, fileSplit.getOffset(0));
        assertEquals(BLOCKSIZE, fileSplit.getLength(0));
        assertEquals(file5.getName(), fileSplit.getPath(1).getName());
        assertEquals(0, fileSplit.getOffset(1));
        assertEquals(BLOCKSIZE, fileSplit.getLength(1));
        assertEquals(hosts1[0], fileSplit.getLocations()[0]);
        dfs.startDataNodes(conf, 1, true, null, rack2, hosts2, null);
        dfs.waitActive();
        // create file on two datanodes.
        Path file2 = new Path(dir2 + "/file2");
        writeFile(conf, file2, (short) 2, 2);
        // split it using a CombinedFile input format
        inFormat = new DummyInputFormat();
        FileInputFormat.setInputPaths(job, dir1 + "," + dir2);
        inFormat.setMinSplitSizeRack(BLOCKSIZE);
        splits = inFormat.getSplits(job);
        System.out.println("Made splits(Test1): " + splits.size());
        for (InputSplit split : splits) {
            System.out.println("File split(Test1): " + split);
        }
        for (InputSplit split : splits) {
            fileSplit = (CombineFileSplit) split;
            /**
         * If rack1 is processed first by
         * {@link CombineFileInputFormat#createSplits},
         * create only one split on rack1. Otherwise create two splits.
         */
            if (splits.size() == 2) {
                // first split is on rack2, contains file2
                if (split.equals(splits.get(0))) {
                    assertEquals(2, fileSplit.getNumPaths());
                    assertEquals(1, fileSplit.getLocations().length);
                    assertEquals(file2.getName(), fileSplit.getPath(0).getName());
                    assertEquals(0, fileSplit.getOffset(0));
                    assertEquals(BLOCKSIZE, fileSplit.getLength(0));
                    assertEquals(file2.getName(), fileSplit.getPath(1).getName());
                    assertEquals(BLOCKSIZE, fileSplit.getOffset(1));
                    assertEquals(BLOCKSIZE, fileSplit.getLength(1));
                    assertEquals(hosts2[0], fileSplit.getLocations()[0]);
                }
                // second split is on rack1, contains file1
                if (split.equals(splits.get(1))) {
                    assertEquals(1, fileSplit.getNumPaths());
                    assertEquals(1, fileSplit.getLocations().length);
                    assertEquals(file1.getName(), fileSplit.getPath(0).getName());
                    assertEquals(0, fileSplit.getOffset(0));
                    assertEquals(BLOCKSIZE, fileSplit.getLength(0));
                    assertEquals(hosts1[0], fileSplit.getLocations()[0]);
                }
            } else if (splits.size() == 1) {
                // first split is on rack1, contains file1 and file2.
                assertEquals(3, fileSplit.getNumPaths());
                Set<Split> expected = new HashSet<>();
                expected.add(new Split(file1.getName(), BLOCKSIZE, 0));
                expected.add(new Split(file2.getName(), BLOCKSIZE, 0));
                expected.add(new Split(file2.getName(), BLOCKSIZE, BLOCKSIZE));
                List<Split> actual = new ArrayList<>();
                for (int i = 0; i < 3; i++) {
                    String name = fileSplit.getPath(i).getName();
                    long length = fileSplit.getLength(i);
                    long offset = fileSplit.getOffset(i);
                    actual.add(new Split(name, length, offset));
                }
                assertTrue(actual.containsAll(expected));
                assertEquals(1, fileSplit.getLocations().length);
                assertEquals(hosts1[0], fileSplit.getLocations()[0]);
            } else {
                fail("Expected split size is 1 or 2, but actual size is " + splits.size());
            }
        }
        // create another file on 3 datanodes and 3 racks.
        dfs.startDataNodes(conf, 1, true, null, rack3, hosts3, null);
        dfs.waitActive();
        Path file3 = new Path(dir3 + "/file3");
        writeFile(conf, new Path(dir3 + "/file3"), (short) 3, 3);
        inFormat = new DummyInputFormat();
        FileInputFormat.setInputPaths(job, dir1 + "," + dir2 + "," + dir3);
        inFormat.setMinSplitSizeRack(BLOCKSIZE);
        splits = inFormat.getSplits(job);
        for (InputSplit split : splits) {
            System.out.println("File split(Test2): " + split);
        }
        Set<Split> expected = new HashSet<>();
        expected.add(new Split(file1.getName(), BLOCKSIZE, 0));
        expected.add(new Split(file2.getName(), BLOCKSIZE, 0));
        expected.add(new Split(file2.getName(), BLOCKSIZE, BLOCKSIZE));
        expected.add(new Split(file3.getName(), BLOCKSIZE, 0));
        expected.add(new Split(file3.getName(), BLOCKSIZE, BLOCKSIZE));
        expected.add(new Split(file3.getName(), BLOCKSIZE, BLOCKSIZE * 2));
        List<Split> actual = new ArrayList<>();
        for (InputSplit split : splits) {
            fileSplit = (CombineFileSplit) split;
            /**
         * If rack1 is processed first by
         * {@link CombineFileInputFormat#createSplits},
         * create only one split on rack1.
         * If rack2 or rack3 is processed first and rack1 is processed second,
         * create one split on rack2 or rack3 and the other split is on rack1.
         * Otherwise create 3 splits for each rack.
         */
            if (splits.size() == 3) {
                // first split is on rack3, contains file3
                if (split.equals(splits.get(0))) {
                    assertEquals(3, fileSplit.getNumPaths());
                    assertEquals(1, fileSplit.getLocations().length);
                    assertEquals(file3.getName(), fileSplit.getPath(0).getName());
                    assertEquals(0, fileSplit.getOffset(0));
                    assertEquals(BLOCKSIZE, fileSplit.getLength(0));
                    assertEquals(file3.getName(), fileSplit.getPath(1).getName());
                    assertEquals(BLOCKSIZE, fileSplit.getOffset(1));
                    assertEquals(BLOCKSIZE, fileSplit.getLength(1));
                    assertEquals(file3.getName(), fileSplit.getPath(2).getName());
                    assertEquals(2 * BLOCKSIZE, fileSplit.getOffset(2));
                    assertEquals(BLOCKSIZE, fileSplit.getLength(2));
                    assertEquals(hosts3[0], fileSplit.getLocations()[0]);
                }
                // second split is on rack2, contains file2
                if (split.equals(splits.get(1))) {
                    assertEquals(2, fileSplit.getNumPaths());
                    assertEquals(1, fileSplit.getLocations().length);
                    assertEquals(file2.getName(), fileSplit.getPath(0).getName());
                    assertEquals(0, fileSplit.getOffset(0));
                    assertEquals(BLOCKSIZE, fileSplit.getLength(0));
                    assertEquals(file2.getName(), fileSplit.getPath(1).getName());
                    assertEquals(BLOCKSIZE, fileSplit.getOffset(1));
                    assertEquals(BLOCKSIZE, fileSplit.getLength(1));
                    assertEquals(hosts2[0], fileSplit.getLocations()[0]);
                }
                // third split is on rack1, contains file1
                if (split.equals(splits.get(2))) {
                    assertEquals(1, fileSplit.getNumPaths());
                    assertEquals(1, fileSplit.getLocations().length);
                    assertEquals(file1.getName(), fileSplit.getPath(0).getName());
                    assertEquals(0, fileSplit.getOffset(0));
                    assertEquals(BLOCKSIZE, fileSplit.getLength(0));
                    assertEquals(hosts1[0], fileSplit.getLocations()[0]);
                }
            } else if (splits.size() == 2) {
                // first split is on rack2 or rack3, contains one or two files.
                if (split.equals(splits.get(0))) {
                    assertEquals(1, fileSplit.getLocations().length);
                    if (fileSplit.getLocations()[0].equals(hosts2[0])) {
                        assertEquals(2, fileSplit.getNumPaths());
                    } else if (fileSplit.getLocations()[0].equals(hosts3[0])) {
                        assertEquals(3, fileSplit.getNumPaths());
                    } else {
                        fail("First split should be on rack2 or rack3.");
                    }
                }
                // second split is on rack1, contains the rest files.
                if (split.equals(splits.get(1))) {
                    assertEquals(1, fileSplit.getLocations().length);
                    assertEquals(hosts1[0], fileSplit.getLocations()[0]);
                }
            } else if (splits.size() == 1) {
                // first split is rack1, contains all three files.
                assertEquals(1, fileSplit.getLocations().length);
                assertEquals(6, fileSplit.getNumPaths());
                assertEquals(hosts1[0], fileSplit.getLocations()[0]);
            } else {
                fail("Split size should be 1, 2, or 3.");
            }
            for (int i = 0; i < fileSplit.getNumPaths(); i++) {
                String name = fileSplit.getPath(i).getName();
                long length = fileSplit.getLength(i);
                long offset = fileSplit.getOffset(i);
                actual.add(new Split(name, length, offset));
            }
        }
        assertEquals(6, actual.size());
        assertTrue(actual.containsAll(expected));
        // create file4 on all three racks
        Path file4 = new Path(dir4 + "/file4");
        writeFile(conf, file4, (short) 3, 3);
        inFormat = new DummyInputFormat();
        FileInputFormat.setInputPaths(job, dir1 + "," + dir2 + "," + dir3 + "," + dir4);
        inFormat.setMinSplitSizeRack(BLOCKSIZE);
        splits = inFormat.getSplits(job);
        for (InputSplit split : splits) {
            System.out.println("File split(Test3): " + split);
        }
        expected.add(new Split(file4.getName(), BLOCKSIZE, 0));
        expected.add(new Split(file4.getName(), BLOCKSIZE, BLOCKSIZE));
        expected.add(new Split(file4.getName(), BLOCKSIZE, BLOCKSIZE * 2));
        actual.clear();
        for (InputSplit split : splits) {
            fileSplit = (CombineFileSplit) split;
            /**
         * If rack1 is processed first by
         * {@link CombineFileInputFormat#createSplits},
         * create only one split on rack1.
         * If rack2 or rack3 is processed first and rack1 is processed second,
         * create one split on rack2 or rack3 and the other split is on rack1.
         * Otherwise create 3 splits for each rack.
         */
            if (splits.size() == 3) {
                // first split is on rack3, contains file3 and file4
                if (split.equals(splits.get(0))) {
                    assertEquals(6, fileSplit.getNumPaths());
                    assertEquals(1, fileSplit.getLocations().length);
                    assertEquals(hosts3[0], fileSplit.getLocations()[0]);
                }
                // second split is on rack2, contains file2
                if (split.equals(splits.get(1))) {
                    assertEquals(2, fileSplit.getNumPaths());
                    assertEquals(1, fileSplit.getLocations().length);
                    assertEquals(file2.getName(), fileSplit.getPath(0).getName());
                    assertEquals(0, fileSplit.getOffset(0));
                    assertEquals(BLOCKSIZE, fileSplit.getLength(0));
                    assertEquals(file2.getName(), fileSplit.getPath(1).getName());
                    assertEquals(BLOCKSIZE, fileSplit.getOffset(1));
                    assertEquals(BLOCKSIZE, fileSplit.getLength(1));
                    assertEquals(hosts2[0], fileSplit.getLocations()[0]);
                }
                // third split is on rack1, contains file1
                if (split.equals(splits.get(2))) {
                    assertEquals(1, fileSplit.getNumPaths());
                    assertEquals(1, fileSplit.getLocations().length);
                    assertEquals(file1.getName(), fileSplit.getPath(0).getName());
                    assertEquals(0, fileSplit.getOffset(0));
                    assertEquals(BLOCKSIZE, fileSplit.getLength(0));
                    assertEquals(hosts1[0], fileSplit.getLocations()[0]);
                }
            } else if (splits.size() == 2) {
                // first split is on rack2 or rack3, contains two or three files.
                if (split.equals(splits.get(0))) {
                    assertEquals(1, fileSplit.getLocations().length);
                    if (fileSplit.getLocations()[0].equals(hosts2[0])) {
                        assertEquals(5, fileSplit.getNumPaths());
                    } else if (fileSplit.getLocations()[0].equals(hosts3[0])) {
                        assertEquals(6, fileSplit.getNumPaths());
                    } else {
                        fail("First split should be on rack2 or rack3.");
                    }
                }
                // second split is on rack1, contains the rest files.
                if (split.equals(splits.get(1))) {
                    assertEquals(1, fileSplit.getLocations().length);
                    assertEquals(hosts1[0], fileSplit.getLocations()[0]);
                }
            } else if (splits.size() == 1) {
                // first split is rack1, contains all four files.
                assertEquals(1, fileSplit.getLocations().length);
                assertEquals(9, fileSplit.getNumPaths());
                assertEquals(hosts1[0], fileSplit.getLocations()[0]);
            } else {
                fail("Split size should be 1, 2, or 3.");
            }
            for (int i = 0; i < fileSplit.getNumPaths(); i++) {
                String name = fileSplit.getPath(i).getName();
                long length = fileSplit.getLength(i);
                long offset = fileSplit.getOffset(i);
                actual.add(new Split(name, length, offset));
            }
        }
        assertEquals(9, actual.size());
        assertTrue(actual.containsAll(expected));
        // maximum split size is 2 blocks 
        inFormat = new DummyInputFormat();
        inFormat.setMinSplitSizeNode(BLOCKSIZE);
        inFormat.setMaxSplitSize(2 * BLOCKSIZE);
        FileInputFormat.setInputPaths(job, dir1 + "," + dir2 + "," + dir3 + "," + dir4);
        splits = inFormat.getSplits(job);
        for (InputSplit split : splits) {
            System.out.println("File split(Test4): " + split);
        }
        assertEquals(5, splits.size());
        actual.clear();
        reset(mockList);
        for (InputSplit split : splits) {
            fileSplit = (CombineFileSplit) split;
            for (int i = 0; i < fileSplit.getNumPaths(); i++) {
                String name = fileSplit.getPath(i).getName();
                long length = fileSplit.getLength(i);
                long offset = fileSplit.getOffset(i);
                actual.add(new Split(name, length, offset));
            }
            mockList.add(fileSplit.getLocations()[0]);
        }
        assertEquals(9, actual.size());
        assertTrue(actual.containsAll(expected));
        // verify the splits are on all the racks
        verify(mockList, atLeastOnce()).add(hosts1[0]);
        verify(mockList, atLeastOnce()).add(hosts2[0]);
        verify(mockList, atLeastOnce()).add(hosts3[0]);
        // maximum split size is 3 blocks 
        inFormat = new DummyInputFormat();
        inFormat.setMinSplitSizeNode(BLOCKSIZE);
        inFormat.setMaxSplitSize(3 * BLOCKSIZE);
        FileInputFormat.setInputPaths(job, dir1 + "," + dir2 + "," + dir3 + "," + dir4);
        splits = inFormat.getSplits(job);
        for (InputSplit split : splits) {
            System.out.println("File split(Test5): " + split);
        }
        assertEquals(3, splits.size());
        actual.clear();
        reset(mockList);
        for (InputSplit split : splits) {
            fileSplit = (CombineFileSplit) split;
            for (int i = 0; i < fileSplit.getNumPaths(); i++) {
                String name = fileSplit.getPath(i).getName();
                long length = fileSplit.getLength(i);
                long offset = fileSplit.getOffset(i);
                actual.add(new Split(name, length, offset));
            }
            mockList.add(fileSplit.getLocations()[0]);
        }
        assertEquals(9, actual.size());
        assertTrue(actual.containsAll(expected));
        verify(mockList, atLeastOnce()).add(hosts1[0]);
        verify(mockList, atLeastOnce()).add(hosts2[0]);
        // maximum split size is 4 blocks 
        inFormat = new DummyInputFormat();
        inFormat.setMaxSplitSize(4 * BLOCKSIZE);
        FileInputFormat.setInputPaths(job, dir1 + "," + dir2 + "," + dir3 + "," + dir4);
        splits = inFormat.getSplits(job);
        for (InputSplit split : splits) {
            System.out.println("File split(Test6): " + split);
        }
        assertEquals(3, splits.size());
        actual.clear();
        reset(mockList);
        for (InputSplit split : splits) {
            fileSplit = (CombineFileSplit) split;
            for (int i = 0; i < fileSplit.getNumPaths(); i++) {
                String name = fileSplit.getPath(i).getName();
                long length = fileSplit.getLength(i);
                long offset = fileSplit.getOffset(i);
                actual.add(new Split(name, length, offset));
            }
            mockList.add(fileSplit.getLocations()[0]);
        }
        assertEquals(9, actual.size());
        assertTrue(actual.containsAll(expected));
        verify(mockList, atLeastOnce()).add(hosts1[0]);
        // maximum split size is 7 blocks and min is 3 blocks
        inFormat = new DummyInputFormat();
        inFormat.setMaxSplitSize(7 * BLOCKSIZE);
        inFormat.setMinSplitSizeNode(3 * BLOCKSIZE);
        inFormat.setMinSplitSizeRack(3 * BLOCKSIZE);
        FileInputFormat.setInputPaths(job, dir1 + "," + dir2 + "," + dir3 + "," + dir4);
        splits = inFormat.getSplits(job);
        for (InputSplit split : splits) {
            System.out.println("File split(Test7): " + split);
        }
        assertEquals(2, splits.size());
        actual.clear();
        reset(mockList);
        for (InputSplit split : splits) {
            fileSplit = (CombineFileSplit) split;
            for (int i = 0; i < fileSplit.getNumPaths(); i++) {
                String name = fileSplit.getPath(i).getName();
                long length = fileSplit.getLength(i);
                long offset = fileSplit.getOffset(i);
                actual.add(new Split(name, length, offset));
            }
            mockList.add(fileSplit.getLocations()[0]);
        }
        assertEquals(9, actual.size());
        assertTrue(actual.containsAll(expected));
        verify(mockList, atLeastOnce()).add(hosts1[0]);
        // Rack 1 has file1, file2 and file3 and file4
        // Rack 2 has file2 and file3 and file4
        // Rack 3 has file3 and file4
        // setup a filter so that only (file1 and file2) or (file3 and file4)
        // can be combined
        inFormat = new DummyInputFormat();
        FileInputFormat.addInputPath(job, inDir);
        // everything is at least rack local
        inFormat.setMinSplitSizeRack(1);
        inFormat.createPool(new TestFilter(dir1), new TestFilter(dir2));
        splits = inFormat.getSplits(job);
        for (InputSplit split : splits) {
            System.out.println("File split(Test1): " + split);
        }
        for (InputSplit split : splits) {
            fileSplit = (CombineFileSplit) split;
            if (splits.size() == 2) {
                // first split is on rack1, contains file1 and file2.
                if (split.equals(splits.get(0))) {
                    assertEquals(3, fileSplit.getNumPaths());
                    expected.clear();
                    expected.add(new Split(file1.getName(), BLOCKSIZE, 0));
                    expected.add(new Split(file2.getName(), BLOCKSIZE, 0));
                    expected.add(new Split(file2.getName(), BLOCKSIZE, BLOCKSIZE));
                    actual.clear();
                    for (int i = 0; i < 3; i++) {
                        String name = fileSplit.getPath(i).getName();
                        long length = fileSplit.getLength(i);
                        long offset = fileSplit.getOffset(i);
                        actual.add(new Split(name, length, offset));
                    }
                    assertTrue(actual.containsAll(expected));
                    assertEquals(1, fileSplit.getLocations().length);
                    assertEquals(hosts1[0], fileSplit.getLocations()[0]);
                }
                if (split.equals(splits.get(1))) {
                    // second split contains the file3 and file4, however,
                    // the locations is undetermined.
                    assertEquals(6, fileSplit.getNumPaths());
                    expected.clear();
                    expected.add(new Split(file3.getName(), BLOCKSIZE, 0));
                    expected.add(new Split(file3.getName(), BLOCKSIZE, BLOCKSIZE));
                    expected.add(new Split(file3.getName(), BLOCKSIZE, BLOCKSIZE * 2));
                    expected.add(new Split(file4.getName(), BLOCKSIZE, 0));
                    expected.add(new Split(file4.getName(), BLOCKSIZE, BLOCKSIZE));
                    expected.add(new Split(file4.getName(), BLOCKSIZE, BLOCKSIZE * 2));
                    actual.clear();
                    for (int i = 0; i < 6; i++) {
                        String name = fileSplit.getPath(i).getName();
                        long length = fileSplit.getLength(i);
                        long offset = fileSplit.getOffset(i);
                        actual.add(new Split(name, length, offset));
                    }
                    assertTrue(actual.containsAll(expected));
                    assertEquals(1, fileSplit.getLocations().length);
                }
            } else if (splits.size() == 3) {
                if (split.equals(splits.get(0))) {
                    // first split is on rack2, contains file2
                    assertEquals(2, fileSplit.getNumPaths());
                    expected.clear();
                    expected.add(new Split(file2.getName(), BLOCKSIZE, 0));
                    expected.add(new Split(file2.getName(), BLOCKSIZE, BLOCKSIZE));
                    actual.clear();
                    for (int i = 0; i < 2; i++) {
                        String name = fileSplit.getPath(i).getName();
                        long length = fileSplit.getLength(i);
                        long offset = fileSplit.getOffset(i);
                        actual.add(new Split(name, length, offset));
                    }
                    assertTrue(actual.containsAll(expected));
                    assertEquals(1, fileSplit.getLocations().length);
                    assertEquals(hosts2[0], fileSplit.getLocations()[0]);
                }
                if (split.equals(splits.get(1))) {
                    // second split is on rack1, contains file1
                    assertEquals(1, fileSplit.getNumPaths());
                    assertEquals(file1.getName(), fileSplit.getPath(0).getName());
                    assertEquals(BLOCKSIZE, fileSplit.getLength(0));
                    assertEquals(0, fileSplit.getOffset(0));
                    assertEquals(1, fileSplit.getLocations().length);
                    assertEquals(hosts1[0], fileSplit.getLocations()[0]);
                }
                if (split.equals(splits.get(2))) {
                    // third split contains file3 and file4, however,
                    // the locations is undetermined.
                    assertEquals(6, fileSplit.getNumPaths());
                    expected.clear();
                    expected.add(new Split(file3.getName(), BLOCKSIZE, 0));
                    expected.add(new Split(file3.getName(), BLOCKSIZE, BLOCKSIZE));
                    expected.add(new Split(file3.getName(), BLOCKSIZE, BLOCKSIZE * 2));
                    expected.add(new Split(file4.getName(), BLOCKSIZE, 0));
                    expected.add(new Split(file4.getName(), BLOCKSIZE, BLOCKSIZE));
                    expected.add(new Split(file4.getName(), BLOCKSIZE, BLOCKSIZE * 2));
                    actual.clear();
                    for (int i = 0; i < 6; i++) {
                        String name = fileSplit.getPath(i).getName();
                        long length = fileSplit.getLength(i);
                        long offset = fileSplit.getOffset(i);
                        actual.add(new Split(name, length, offset));
                    }
                    assertTrue(actual.containsAll(expected));
                    assertEquals(1, fileSplit.getLocations().length);
                }
            } else {
                fail("Split size should be 2 or 3.");
            }
        }
        // measure performance when there are multiple pools and
        // many files in each pool.
        int numPools = 100;
        int numFiles = 1000;
        DummyInputFormat1 inFormat1 = new DummyInputFormat1();
        for (int i = 0; i < numFiles; i++) {
            FileInputFormat.setInputPaths(job, file1);
        }
        // everything is at least rack local
        inFormat1.setMinSplitSizeRack(1);
        final Path dirNoMatch1 = new Path(inDir, "/dirxx");
        final Path dirNoMatch2 = new Path(inDir, "/diryy");
        for (int i = 0; i < numPools; i++) {
            inFormat1.createPool(new TestFilter(dirNoMatch1), new TestFilter(dirNoMatch2));
        }
        long start = System.currentTimeMillis();
        splits = inFormat1.getSplits(job);
        long end = System.currentTimeMillis();
        System.out.println("Elapsed time for " + numPools + " pools " + " and " + numFiles + " files is " + ((end - start) / 1000) + " seconds.");
        // This file has three whole blocks. If the maxsplit size is
        // half the block size, then there should be six splits.
        inFormat = new DummyInputFormat();
        inFormat.setMaxSplitSize(BLOCKSIZE / 2);
        FileInputFormat.setInputPaths(job, dir3);
        splits = inFormat.getSplits(job);
        for (InputSplit split : splits) {
            System.out.println("File split(Test8): " + split);
        }
        assertEquals(splits.size(), 6);
    } finally {
        if (dfs != null) {
            dfs.shutdown();
        }
    }
}
Also used : Set(java.util.Set) HashSet(java.util.HashSet) Configuration(org.apache.hadoop.conf.Configuration) ArrayList(java.util.ArrayList) FileSystem(org.apache.hadoop.fs.FileSystem) DistributedFileSystem(org.apache.hadoop.hdfs.DistributedFileSystem) List(java.util.List) ArrayList(java.util.ArrayList) Job(org.apache.hadoop.mapreduce.Job) InputSplit(org.apache.hadoop.mapreduce.InputSplit) HashSet(java.util.HashSet) Path(org.apache.hadoop.fs.Path) MiniDFSCluster(org.apache.hadoop.hdfs.MiniDFSCluster) IOException(java.io.IOException) InputSplit(org.apache.hadoop.mapreduce.InputSplit) Test(org.junit.Test)

Example 40 with Set

use of java.util.Set in project hadoop by apache.

the class TestCombineFileInputFormat method testNodeDistribution.

@Test
public void testNodeDistribution() throws IOException, InterruptedException {
    DummyInputFormat inFormat = new DummyInputFormat();
    int numBlocks = 60;
    long totLength = 0;
    long blockSize = 100;
    int numNodes = 10;
    long minSizeNode = 50;
    long minSizeRack = 50;
    // 4 blocks per split.
    int maxSplitSize = 200;
    String[] locations = new String[numNodes];
    for (int i = 0; i < numNodes; i++) {
        locations[i] = "h" + i;
    }
    String[] racks = new String[0];
    Path path = new Path("hdfs://file");
    OneBlockInfo[] blocks = new OneBlockInfo[numBlocks];
    int hostCountBase = 0;
    // Generate block list. Replication 3 per block.
    for (int i = 0; i < numBlocks; i++) {
        int localHostCount = hostCountBase;
        String[] blockHosts = new String[3];
        for (int j = 0; j < 3; j++) {
            int hostNum = localHostCount % numNodes;
            blockHosts[j] = "h" + hostNum;
            localHostCount++;
        }
        hostCountBase++;
        blocks[i] = new OneBlockInfo(path, i * blockSize, blockSize, blockHosts, racks);
        totLength += blockSize;
    }
    List<InputSplit> splits = new ArrayList<InputSplit>();
    HashMap<String, Set<String>> rackToNodes = new HashMap<String, Set<String>>();
    HashMap<String, List<OneBlockInfo>> rackToBlocks = new HashMap<String, List<OneBlockInfo>>();
    HashMap<OneBlockInfo, String[]> blockToNodes = new HashMap<OneBlockInfo, String[]>();
    Map<String, Set<OneBlockInfo>> nodeToBlocks = new TreeMap<String, Set<OneBlockInfo>>();
    OneFileInfo.populateBlockInfo(blocks, rackToBlocks, blockToNodes, nodeToBlocks, rackToNodes);
    inFormat.createSplits(nodeToBlocks, blockToNodes, rackToBlocks, totLength, maxSplitSize, minSizeNode, minSizeRack, splits);
    int expectedSplitCount = (int) (totLength / maxSplitSize);
    assertEquals(expectedSplitCount, splits.size());
    // Ensure 90+% of the splits have node local blocks.
    // 100% locality may not always be achieved.
    int numLocalSplits = 0;
    for (InputSplit inputSplit : splits) {
        assertEquals(maxSplitSize, inputSplit.getLength());
        if (inputSplit.getLocations().length == 1) {
            numLocalSplits++;
        }
    }
    assertTrue(numLocalSplits >= 0.9 * splits.size());
}
Also used : Path(org.apache.hadoop.fs.Path) Set(java.util.Set) HashSet(java.util.HashSet) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) OneBlockInfo(org.apache.hadoop.mapreduce.lib.input.CombineFileInputFormat.OneBlockInfo) TreeMap(java.util.TreeMap) List(java.util.List) ArrayList(java.util.ArrayList) InputSplit(org.apache.hadoop.mapreduce.InputSplit) Test(org.junit.Test)

Aggregations

Set (java.util.Set)6789 HashSet (java.util.HashSet)4372 HashMap (java.util.HashMap)2090 Map (java.util.Map)1865 Iterator (java.util.Iterator)1774 ArrayList (java.util.ArrayList)1113 List (java.util.List)980 Test (org.junit.Test)920 TreeSet (java.util.TreeSet)536 IOException (java.io.IOException)501 SSOException (com.iplanet.sso.SSOException)467 LinkedHashSet (java.util.LinkedHashSet)418 SMSException (com.sun.identity.sm.SMSException)347 IdRepoException (com.sun.identity.idm.IdRepoException)268 Collection (java.util.Collection)259 ImmutableSet (com.google.common.collect.ImmutableSet)256 File (java.io.File)245 SSOToken (com.iplanet.sso.SSOToken)226 Collectors (java.util.stream.Collectors)219 Test (org.testng.annotations.Test)209