use of org.apache.hadoop.mapreduce.lib.input.CombineFileInputFormat.OneBlockInfo in project hadoop by apache.
the class TestCombineFileInputFormat method testNodeInputSplit.
@Test
public void testNodeInputSplit() throws IOException, InterruptedException {
// Regression test for MAPREDUCE-4892. There are 2 nodes with all blocks on
// both nodes. The grouping ensures that both nodes get splits instead of
// just the first node
DummyInputFormat inFormat = new DummyInputFormat();
int numBlocks = 12;
long totLength = 0;
long blockSize = 100;
long maxSize = 200;
long minSizeNode = 50;
long minSizeRack = 50;
String[] locations = { "h1", "h2" };
String[] racks = new String[0];
Path path = new Path("hdfs://file");
OneBlockInfo[] blocks = new OneBlockInfo[numBlocks];
for (int i = 0; i < numBlocks; ++i) {
blocks[i] = new OneBlockInfo(path, i * blockSize, blockSize, locations, racks);
totLength += blockSize;
}
List<InputSplit> splits = new ArrayList<InputSplit>();
HashMap<String, Set<String>> rackToNodes = new HashMap<String, Set<String>>();
HashMap<String, List<OneBlockInfo>> rackToBlocks = new HashMap<String, List<OneBlockInfo>>();
HashMap<OneBlockInfo, String[]> blockToNodes = new HashMap<OneBlockInfo, String[]>();
HashMap<String, Set<OneBlockInfo>> nodeToBlocks = new HashMap<String, Set<OneBlockInfo>>();
OneFileInfo.populateBlockInfo(blocks, rackToBlocks, blockToNodes, nodeToBlocks, rackToNodes);
inFormat.createSplits(nodeToBlocks, blockToNodes, rackToBlocks, totLength, maxSize, minSizeNode, minSizeRack, splits);
int expectedSplitCount = (int) (totLength / maxSize);
assertEquals(expectedSplitCount, splits.size());
HashMultiset<String> nodeSplits = HashMultiset.create();
for (int i = 0; i < expectedSplitCount; ++i) {
InputSplit inSplit = splits.get(i);
assertEquals(maxSize, inSplit.getLength());
assertEquals(1, inSplit.getLocations().length);
nodeSplits.add(inSplit.getLocations()[0]);
}
assertEquals(3, nodeSplits.count(locations[0]));
assertEquals(3, nodeSplits.count(locations[1]));
}
use of org.apache.hadoop.mapreduce.lib.input.CombineFileInputFormat.OneBlockInfo in project hadoop by apache.
the class TestCombineFileInputFormat method testNodeDistribution.
@Test
public void testNodeDistribution() throws IOException, InterruptedException {
DummyInputFormat inFormat = new DummyInputFormat();
int numBlocks = 60;
long totLength = 0;
long blockSize = 100;
int numNodes = 10;
long minSizeNode = 50;
long minSizeRack = 50;
// 4 blocks per split.
int maxSplitSize = 200;
String[] locations = new String[numNodes];
for (int i = 0; i < numNodes; i++) {
locations[i] = "h" + i;
}
String[] racks = new String[0];
Path path = new Path("hdfs://file");
OneBlockInfo[] blocks = new OneBlockInfo[numBlocks];
int hostCountBase = 0;
// Generate block list. Replication 3 per block.
for (int i = 0; i < numBlocks; i++) {
int localHostCount = hostCountBase;
String[] blockHosts = new String[3];
for (int j = 0; j < 3; j++) {
int hostNum = localHostCount % numNodes;
blockHosts[j] = "h" + hostNum;
localHostCount++;
}
hostCountBase++;
blocks[i] = new OneBlockInfo(path, i * blockSize, blockSize, blockHosts, racks);
totLength += blockSize;
}
List<InputSplit> splits = new ArrayList<InputSplit>();
HashMap<String, Set<String>> rackToNodes = new HashMap<String, Set<String>>();
HashMap<String, List<OneBlockInfo>> rackToBlocks = new HashMap<String, List<OneBlockInfo>>();
HashMap<OneBlockInfo, String[]> blockToNodes = new HashMap<OneBlockInfo, String[]>();
Map<String, Set<OneBlockInfo>> nodeToBlocks = new TreeMap<String, Set<OneBlockInfo>>();
OneFileInfo.populateBlockInfo(blocks, rackToBlocks, blockToNodes, nodeToBlocks, rackToNodes);
inFormat.createSplits(nodeToBlocks, blockToNodes, rackToBlocks, totLength, maxSplitSize, minSizeNode, minSizeRack, splits);
int expectedSplitCount = (int) (totLength / maxSplitSize);
assertEquals(expectedSplitCount, splits.size());
// Ensure 90+% of the splits have node local blocks.
// 100% locality may not always be achieved.
int numLocalSplits = 0;
for (InputSplit inputSplit : splits) {
assertEquals(maxSplitSize, inputSplit.getLength());
if (inputSplit.getLocations().length == 1) {
numLocalSplits++;
}
}
assertTrue(numLocalSplits >= 0.9 * splits.size());
}
Aggregations