use of java.util.BitSet in project hadoop by apache.
the class TestReconstructStripedBlocks method testCountLiveReplicas.
/**
* make sure the NN can detect the scenario where there are enough number of
* internal blocks (>=9 by default) but there is still missing data/parity
* block.
*/
@Test
public void testCountLiveReplicas() throws Exception {
final HdfsConfiguration conf = new HdfsConfiguration();
conf.setInt(DFSConfigKeys.DFS_NAMENODE_REDUNDANCY_INTERVAL_SECONDS_KEY, 1);
conf.setBoolean(DFSConfigKeys.DFS_NAMENODE_REDUNDANCY_CONSIDERLOAD_KEY, false);
conf.set(DFSConfigKeys.DFS_NAMENODE_EC_POLICIES_ENABLED_KEY, StripedFileTestUtil.getDefaultECPolicy().getName());
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(groupSize + 2).build();
cluster.waitActive();
DistributedFileSystem fs = cluster.getFileSystem();
try {
fs.mkdirs(dirPath);
fs.setErasureCodingPolicy(dirPath, StripedFileTestUtil.getDefaultECPolicy().getName());
DFSTestUtil.createFile(fs, filePath, cellSize * dataBlocks * 2, (short) 1, 0L);
// stop a dn
LocatedBlocks blks = fs.getClient().getLocatedBlocks(filePath.toString(), 0);
LocatedStripedBlock block = (LocatedStripedBlock) blks.getLastLocatedBlock();
DatanodeInfo dnToStop = block.getLocations()[0];
MiniDFSCluster.DataNodeProperties dnProp = cluster.stopDataNode(dnToStop.getXferAddr());
cluster.setDataNodeDead(dnToStop);
// wait for reconstruction to happen
DFSTestUtil.waitForReplication(fs, filePath, groupSize, 15 * 1000);
// bring the dn back: 10 internal blocks now
cluster.restartDataNode(dnProp);
cluster.waitActive();
// stop another dn: 9 internal blocks, but only cover 8 real one
dnToStop = block.getLocations()[1];
cluster.stopDataNode(dnToStop.getXferAddr());
cluster.setDataNodeDead(dnToStop);
// currently namenode is able to track the missing block. but restart NN
cluster.restartNameNode(true);
for (DataNode dn : cluster.getDataNodes()) {
DataNodeTestUtils.triggerBlockReport(dn);
}
FSNamesystem fsn = cluster.getNamesystem();
BlockManager bm = fsn.getBlockManager();
// wait 3 running cycles of redundancy monitor
Thread.sleep(3000);
for (DataNode dn : cluster.getDataNodes()) {
DataNodeTestUtils.triggerHeartbeat(dn);
}
// check if NN can detect the missing internal block and finish the
// reconstruction
StripedFileTestUtil.waitForReconstructionFinished(filePath, fs, groupSize);
boolean reconstructed = false;
for (int i = 0; i < 5; i++) {
NumberReplicas num = null;
fsn.readLock();
try {
BlockInfo blockInfo = cluster.getNamesystem().getFSDirectory().getINode4Write(filePath.toString()).asFile().getLastBlock();
num = bm.countNodes(blockInfo);
} finally {
fsn.readUnlock();
}
if (num.liveReplicas() >= groupSize) {
reconstructed = true;
break;
} else {
Thread.sleep(1000);
}
}
Assert.assertTrue(reconstructed);
blks = fs.getClient().getLocatedBlocks(filePath.toString(), 0);
block = (LocatedStripedBlock) blks.getLastLocatedBlock();
BitSet bitSet = new BitSet(groupSize);
for (byte index : block.getBlockIndices()) {
bitSet.set(index);
}
for (int i = 0; i < groupSize; i++) {
Assert.assertTrue(bitSet.get(i));
}
} finally {
cluster.shutdown();
}
}
use of java.util.BitSet in project hadoop by apache.
the class TestMultiFileInputFormat method testFormat.
@Test
public void testFormat() throws IOException {
LOG.info("Test started");
LOG.info("Max split count = " + MAX_SPLIT_COUNT);
LOG.info("Split count increment = " + SPLIT_COUNT_INCR);
LOG.info("Max bytes per file = " + MAX_BYTES);
LOG.info("Max number of files = " + MAX_NUM_FILES);
LOG.info("Number of files increment = " + NUM_FILES_INCR);
MultiFileInputFormat<Text, Text> format = new DummyMultiFileInputFormat();
FileSystem fs = FileSystem.getLocal(job);
for (int numFiles = 1; numFiles < MAX_NUM_FILES; numFiles += (NUM_FILES_INCR / 2) + rand.nextInt(NUM_FILES_INCR / 2)) {
Path dir = initFiles(fs, numFiles, -1);
BitSet bits = new BitSet(numFiles);
for (int i = 1; i < MAX_SPLIT_COUNT; i += rand.nextInt(SPLIT_COUNT_INCR) + 1) {
LOG.info("Running for Num Files=" + numFiles + ", split count=" + i);
MultiFileSplit[] splits = (MultiFileSplit[]) format.getSplits(job, i);
bits.clear();
for (MultiFileSplit split : splits) {
long splitLength = 0;
for (Path p : split.getPaths()) {
long length = fs.getContentSummary(p).getLength();
assertEquals(length, lengths.get(p.getName()).longValue());
splitLength += length;
String name = p.getName();
int index = Integer.parseInt(name.substring(name.lastIndexOf("file_") + 5));
assertFalse(bits.get(index));
bits.set(index);
}
assertEquals(splitLength, split.getLength());
}
}
assertEquals(bits.cardinality(), numFiles);
fs.delete(dir, true);
}
LOG.info("Test Finished");
}
use of java.util.BitSet in project hadoop by apache.
the class TestSequenceFileAsTextInputFormat method testFormat.
@Test
public void testFormat() throws Exception {
JobConf job = new JobConf(conf);
FileSystem fs = FileSystem.getLocal(conf);
Path dir = new Path(System.getProperty("test.build.data", ".") + "/mapred");
Path file = new Path(dir, "test.seq");
Reporter reporter = Reporter.NULL;
int seed = new Random().nextInt();
//LOG.info("seed = "+seed);
Random random = new Random(seed);
fs.delete(dir, true);
FileInputFormat.setInputPaths(job, dir);
// for a variety of lengths
for (int length = 0; length < MAX_LENGTH; length += random.nextInt(MAX_LENGTH / 10) + 1) {
//LOG.info("creating; entries = " + length);
// create a file with length entries
SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, file, IntWritable.class, LongWritable.class);
try {
for (int i = 0; i < length; i++) {
IntWritable key = new IntWritable(i);
LongWritable value = new LongWritable(10 * i);
writer.append(key, value);
}
} finally {
writer.close();
}
// try splitting the file in a variety of sizes
InputFormat<Text, Text> format = new SequenceFileAsTextInputFormat();
for (int i = 0; i < 3; i++) {
int numSplits = random.nextInt(MAX_LENGTH / (SequenceFile.SYNC_INTERVAL / 20)) + 1;
//LOG.info("splitting: requesting = " + numSplits);
InputSplit[] splits = format.getSplits(job, numSplits);
//LOG.info("splitting: got = " + splits.length);
// check each split
BitSet bits = new BitSet(length);
for (int j = 0; j < splits.length; j++) {
RecordReader<Text, Text> reader = format.getRecordReader(splits[j], job, reporter);
Class readerClass = reader.getClass();
assertEquals("reader class is SequenceFileAsTextRecordReader.", SequenceFileAsTextRecordReader.class, readerClass);
Text value = reader.createValue();
Text key = reader.createKey();
try {
int count = 0;
while (reader.next(key, value)) {
// if (bits.get(key.get())) {
// LOG.info("splits["+j+"]="+splits[j]+" : " + key.get());
// LOG.info("@"+reader.getPos());
// }
int keyInt = Integer.parseInt(key.toString());
assertFalse("Key in multiple partitions.", bits.get(keyInt));
bits.set(keyInt);
count++;
}
//LOG.info("splits["+j+"]="+splits[j]+" count=" + count);
} finally {
reader.close();
}
}
assertEquals("Some keys in no partition.", length, bits.cardinality());
}
}
}
use of java.util.BitSet in project hadoop by apache.
the class TestSequenceFileInputFormat method testFormat.
@Test
public void testFormat() throws Exception {
JobConf job = new JobConf(conf);
FileSystem fs = FileSystem.getLocal(conf);
Path dir = new Path(System.getProperty("test.build.data", ".") + "/mapred");
Path file = new Path(dir, "test.seq");
Reporter reporter = Reporter.NULL;
int seed = new Random().nextInt();
//LOG.info("seed = "+seed);
Random random = new Random(seed);
fs.delete(dir, true);
FileInputFormat.setInputPaths(job, dir);
// for a variety of lengths
for (int length = 0; length < MAX_LENGTH; length += random.nextInt(MAX_LENGTH / 10) + 1) {
//LOG.info("creating; entries = " + length);
// create a file with length entries
SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, file, IntWritable.class, BytesWritable.class);
try {
for (int i = 0; i < length; i++) {
IntWritable key = new IntWritable(i);
byte[] data = new byte[random.nextInt(10)];
random.nextBytes(data);
BytesWritable value = new BytesWritable(data);
writer.append(key, value);
}
} finally {
writer.close();
}
// try splitting the file in a variety of sizes
InputFormat<IntWritable, BytesWritable> format = new SequenceFileInputFormat<IntWritable, BytesWritable>();
IntWritable key = new IntWritable();
BytesWritable value = new BytesWritable();
for (int i = 0; i < 3; i++) {
int numSplits = random.nextInt(MAX_LENGTH / (SequenceFile.SYNC_INTERVAL / 20)) + 1;
//LOG.info("splitting: requesting = " + numSplits);
InputSplit[] splits = format.getSplits(job, numSplits);
//LOG.info("splitting: got = " + splits.length);
// check each split
BitSet bits = new BitSet(length);
for (int j = 0; j < splits.length; j++) {
RecordReader<IntWritable, BytesWritable> reader = format.getRecordReader(splits[j], job, reporter);
try {
int count = 0;
while (reader.next(key, value)) {
// if (bits.get(key.get())) {
// LOG.info("splits["+j+"]="+splits[j]+" : " + key.get());
// LOG.info("@"+reader.getPos());
// }
assertFalse("Key in multiple partitions.", bits.get(key.get()));
bits.set(key.get());
count++;
}
//LOG.info("splits["+j+"]="+splits[j]+" count=" + count);
} finally {
reader.close();
}
}
assertEquals("Some keys in no partition.", length, bits.cardinality());
}
}
}
use of java.util.BitSet in project hadoop by apache.
the class TestTextInputFormat method testFormat.
@Test(timeout = 500000)
public void testFormat() throws Exception {
JobConf job = new JobConf(defaultConf);
Path file = new Path(workDir, "test.txt");
// A reporter that does nothing
Reporter reporter = Reporter.NULL;
int seed = new Random().nextInt();
LOG.info("seed = " + seed);
Random random = new Random(seed);
localFs.delete(workDir, true);
FileInputFormat.setInputPaths(job, workDir);
// for a variety of lengths
for (int length = 0; length < MAX_LENGTH; length += random.nextInt(MAX_LENGTH / 10) + 1) {
LOG.debug("creating; entries = " + length);
// create a file with length entries
Writer writer = new OutputStreamWriter(localFs.create(file));
try {
for (int i = 0; i < length; i++) {
writer.write(Integer.toString(i));
writer.write("\n");
}
} finally {
writer.close();
}
// try splitting the file in a variety of sizes
TextInputFormat format = new TextInputFormat();
format.configure(job);
LongWritable key = new LongWritable();
Text value = new Text();
for (int i = 0; i < 3; i++) {
int numSplits = random.nextInt(MAX_LENGTH / 20) + 1;
LOG.debug("splitting: requesting = " + numSplits);
InputSplit[] splits = format.getSplits(job, numSplits);
LOG.debug("splitting: got = " + splits.length);
if (length == 0) {
assertEquals("Files of length 0 are not returned from FileInputFormat.getSplits().", 1, splits.length);
assertEquals("Empty file length == 0", 0, splits[0].getLength());
}
// check each split
BitSet bits = new BitSet(length);
for (int j = 0; j < splits.length; j++) {
LOG.debug("split[" + j + "]= " + splits[j]);
RecordReader<LongWritable, Text> reader = format.getRecordReader(splits[j], job, reporter);
try {
int count = 0;
while (reader.next(key, value)) {
int v = Integer.parseInt(value.toString());
LOG.debug("read " + v);
if (bits.get(v)) {
LOG.warn("conflict with " + v + " in split " + j + " at position " + reader.getPos());
}
assertFalse("Key in multiple partitions.", bits.get(v));
bits.set(v);
count++;
}
LOG.debug("splits[" + j + "]=" + splits[j] + " count=" + count);
} finally {
reader.close();
}
}
assertEquals("Some keys in no partition.", length, bits.cardinality());
}
}
}
Aggregations