Search in sources :

Example 11 with BitSet

use of java.util.BitSet in project hadoop by apache.

the class TestReconstructStripedBlocks method testCountLiveReplicas.

/**
   * make sure the NN can detect the scenario where there are enough number of
   * internal blocks (>=9 by default) but there is still missing data/parity
   * block.
   */
@Test
public void testCountLiveReplicas() throws Exception {
    final HdfsConfiguration conf = new HdfsConfiguration();
    conf.setInt(DFSConfigKeys.DFS_NAMENODE_REDUNDANCY_INTERVAL_SECONDS_KEY, 1);
    conf.setBoolean(DFSConfigKeys.DFS_NAMENODE_REDUNDANCY_CONSIDERLOAD_KEY, false);
    conf.set(DFSConfigKeys.DFS_NAMENODE_EC_POLICIES_ENABLED_KEY, StripedFileTestUtil.getDefaultECPolicy().getName());
    cluster = new MiniDFSCluster.Builder(conf).numDataNodes(groupSize + 2).build();
    cluster.waitActive();
    DistributedFileSystem fs = cluster.getFileSystem();
    try {
        fs.mkdirs(dirPath);
        fs.setErasureCodingPolicy(dirPath, StripedFileTestUtil.getDefaultECPolicy().getName());
        DFSTestUtil.createFile(fs, filePath, cellSize * dataBlocks * 2, (short) 1, 0L);
        // stop a dn
        LocatedBlocks blks = fs.getClient().getLocatedBlocks(filePath.toString(), 0);
        LocatedStripedBlock block = (LocatedStripedBlock) blks.getLastLocatedBlock();
        DatanodeInfo dnToStop = block.getLocations()[0];
        MiniDFSCluster.DataNodeProperties dnProp = cluster.stopDataNode(dnToStop.getXferAddr());
        cluster.setDataNodeDead(dnToStop);
        // wait for reconstruction to happen
        DFSTestUtil.waitForReplication(fs, filePath, groupSize, 15 * 1000);
        // bring the dn back: 10 internal blocks now
        cluster.restartDataNode(dnProp);
        cluster.waitActive();
        // stop another dn: 9 internal blocks, but only cover 8 real one
        dnToStop = block.getLocations()[1];
        cluster.stopDataNode(dnToStop.getXferAddr());
        cluster.setDataNodeDead(dnToStop);
        // currently namenode is able to track the missing block. but restart NN
        cluster.restartNameNode(true);
        for (DataNode dn : cluster.getDataNodes()) {
            DataNodeTestUtils.triggerBlockReport(dn);
        }
        FSNamesystem fsn = cluster.getNamesystem();
        BlockManager bm = fsn.getBlockManager();
        // wait 3 running cycles of redundancy monitor
        Thread.sleep(3000);
        for (DataNode dn : cluster.getDataNodes()) {
            DataNodeTestUtils.triggerHeartbeat(dn);
        }
        // check if NN can detect the missing internal block and finish the
        // reconstruction
        StripedFileTestUtil.waitForReconstructionFinished(filePath, fs, groupSize);
        boolean reconstructed = false;
        for (int i = 0; i < 5; i++) {
            NumberReplicas num = null;
            fsn.readLock();
            try {
                BlockInfo blockInfo = cluster.getNamesystem().getFSDirectory().getINode4Write(filePath.toString()).asFile().getLastBlock();
                num = bm.countNodes(blockInfo);
            } finally {
                fsn.readUnlock();
            }
            if (num.liveReplicas() >= groupSize) {
                reconstructed = true;
                break;
            } else {
                Thread.sleep(1000);
            }
        }
        Assert.assertTrue(reconstructed);
        blks = fs.getClient().getLocatedBlocks(filePath.toString(), 0);
        block = (LocatedStripedBlock) blks.getLastLocatedBlock();
        BitSet bitSet = new BitSet(groupSize);
        for (byte index : block.getBlockIndices()) {
            bitSet.set(index);
        }
        for (int i = 0; i < groupSize; i++) {
            Assert.assertTrue(bitSet.get(i));
        }
    } finally {
        cluster.shutdown();
    }
}
Also used : DatanodeInfo(org.apache.hadoop.hdfs.protocol.DatanodeInfo) MiniDFSCluster(org.apache.hadoop.hdfs.MiniDFSCluster) LocatedBlocks(org.apache.hadoop.hdfs.protocol.LocatedBlocks) BitSet(java.util.BitSet) NumberReplicas(org.apache.hadoop.hdfs.server.blockmanagement.NumberReplicas) HdfsConfiguration(org.apache.hadoop.hdfs.HdfsConfiguration) DistributedFileSystem(org.apache.hadoop.hdfs.DistributedFileSystem) LocatedStripedBlock(org.apache.hadoop.hdfs.protocol.LocatedStripedBlock) DataNode(org.apache.hadoop.hdfs.server.datanode.DataNode) BlockManager(org.apache.hadoop.hdfs.server.blockmanagement.BlockManager) BlockInfo(org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo) Test(org.junit.Test)

Example 12 with BitSet

use of java.util.BitSet in project hadoop by apache.

the class TestMultiFileInputFormat method testFormat.

@Test
public void testFormat() throws IOException {
    LOG.info("Test started");
    LOG.info("Max split count           = " + MAX_SPLIT_COUNT);
    LOG.info("Split count increment     = " + SPLIT_COUNT_INCR);
    LOG.info("Max bytes per file        = " + MAX_BYTES);
    LOG.info("Max number of files       = " + MAX_NUM_FILES);
    LOG.info("Number of files increment = " + NUM_FILES_INCR);
    MultiFileInputFormat<Text, Text> format = new DummyMultiFileInputFormat();
    FileSystem fs = FileSystem.getLocal(job);
    for (int numFiles = 1; numFiles < MAX_NUM_FILES; numFiles += (NUM_FILES_INCR / 2) + rand.nextInt(NUM_FILES_INCR / 2)) {
        Path dir = initFiles(fs, numFiles, -1);
        BitSet bits = new BitSet(numFiles);
        for (int i = 1; i < MAX_SPLIT_COUNT; i += rand.nextInt(SPLIT_COUNT_INCR) + 1) {
            LOG.info("Running for Num Files=" + numFiles + ", split count=" + i);
            MultiFileSplit[] splits = (MultiFileSplit[]) format.getSplits(job, i);
            bits.clear();
            for (MultiFileSplit split : splits) {
                long splitLength = 0;
                for (Path p : split.getPaths()) {
                    long length = fs.getContentSummary(p).getLength();
                    assertEquals(length, lengths.get(p.getName()).longValue());
                    splitLength += length;
                    String name = p.getName();
                    int index = Integer.parseInt(name.substring(name.lastIndexOf("file_") + 5));
                    assertFalse(bits.get(index));
                    bits.set(index);
                }
                assertEquals(splitLength, split.getLength());
            }
        }
        assertEquals(bits.cardinality(), numFiles);
        fs.delete(dir, true);
    }
    LOG.info("Test Finished");
}
Also used : Path(org.apache.hadoop.fs.Path) FileSystem(org.apache.hadoop.fs.FileSystem) BitSet(java.util.BitSet) Text(org.apache.hadoop.io.Text) Test(org.junit.Test)

Example 13 with BitSet

use of java.util.BitSet in project hadoop by apache.

the class TestSequenceFileAsTextInputFormat method testFormat.

@Test
public void testFormat() throws Exception {
    JobConf job = new JobConf(conf);
    FileSystem fs = FileSystem.getLocal(conf);
    Path dir = new Path(System.getProperty("test.build.data", ".") + "/mapred");
    Path file = new Path(dir, "test.seq");
    Reporter reporter = Reporter.NULL;
    int seed = new Random().nextInt();
    //LOG.info("seed = "+seed);
    Random random = new Random(seed);
    fs.delete(dir, true);
    FileInputFormat.setInputPaths(job, dir);
    // for a variety of lengths
    for (int length = 0; length < MAX_LENGTH; length += random.nextInt(MAX_LENGTH / 10) + 1) {
        //LOG.info("creating; entries = " + length);
        // create a file with length entries
        SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, file, IntWritable.class, LongWritable.class);
        try {
            for (int i = 0; i < length; i++) {
                IntWritable key = new IntWritable(i);
                LongWritable value = new LongWritable(10 * i);
                writer.append(key, value);
            }
        } finally {
            writer.close();
        }
        // try splitting the file in a variety of sizes
        InputFormat<Text, Text> format = new SequenceFileAsTextInputFormat();
        for (int i = 0; i < 3; i++) {
            int numSplits = random.nextInt(MAX_LENGTH / (SequenceFile.SYNC_INTERVAL / 20)) + 1;
            //LOG.info("splitting: requesting = " + numSplits);
            InputSplit[] splits = format.getSplits(job, numSplits);
            //LOG.info("splitting: got =        " + splits.length);
            // check each split
            BitSet bits = new BitSet(length);
            for (int j = 0; j < splits.length; j++) {
                RecordReader<Text, Text> reader = format.getRecordReader(splits[j], job, reporter);
                Class readerClass = reader.getClass();
                assertEquals("reader class is SequenceFileAsTextRecordReader.", SequenceFileAsTextRecordReader.class, readerClass);
                Text value = reader.createValue();
                Text key = reader.createKey();
                try {
                    int count = 0;
                    while (reader.next(key, value)) {
                        // if (bits.get(key.get())) {
                        // LOG.info("splits["+j+"]="+splits[j]+" : " + key.get());
                        // LOG.info("@"+reader.getPos());
                        // }
                        int keyInt = Integer.parseInt(key.toString());
                        assertFalse("Key in multiple partitions.", bits.get(keyInt));
                        bits.set(keyInt);
                        count++;
                    }
                //LOG.info("splits["+j+"]="+splits[j]+" count=" + count);
                } finally {
                    reader.close();
                }
            }
            assertEquals("Some keys in no partition.", length, bits.cardinality());
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) BitSet(java.util.BitSet) Text(org.apache.hadoop.io.Text) Random(java.util.Random) SequenceFile(org.apache.hadoop.io.SequenceFile) FileSystem(org.apache.hadoop.fs.FileSystem) LongWritable(org.apache.hadoop.io.LongWritable) IntWritable(org.apache.hadoop.io.IntWritable) Test(org.junit.Test)

Example 14 with BitSet

use of java.util.BitSet in project hadoop by apache.

the class TestSequenceFileInputFormat method testFormat.

@Test
public void testFormat() throws Exception {
    JobConf job = new JobConf(conf);
    FileSystem fs = FileSystem.getLocal(conf);
    Path dir = new Path(System.getProperty("test.build.data", ".") + "/mapred");
    Path file = new Path(dir, "test.seq");
    Reporter reporter = Reporter.NULL;
    int seed = new Random().nextInt();
    //LOG.info("seed = "+seed);
    Random random = new Random(seed);
    fs.delete(dir, true);
    FileInputFormat.setInputPaths(job, dir);
    // for a variety of lengths
    for (int length = 0; length < MAX_LENGTH; length += random.nextInt(MAX_LENGTH / 10) + 1) {
        //LOG.info("creating; entries = " + length);
        // create a file with length entries
        SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, file, IntWritable.class, BytesWritable.class);
        try {
            for (int i = 0; i < length; i++) {
                IntWritable key = new IntWritable(i);
                byte[] data = new byte[random.nextInt(10)];
                random.nextBytes(data);
                BytesWritable value = new BytesWritable(data);
                writer.append(key, value);
            }
        } finally {
            writer.close();
        }
        // try splitting the file in a variety of sizes
        InputFormat<IntWritable, BytesWritable> format = new SequenceFileInputFormat<IntWritable, BytesWritable>();
        IntWritable key = new IntWritable();
        BytesWritable value = new BytesWritable();
        for (int i = 0; i < 3; i++) {
            int numSplits = random.nextInt(MAX_LENGTH / (SequenceFile.SYNC_INTERVAL / 20)) + 1;
            //LOG.info("splitting: requesting = " + numSplits);
            InputSplit[] splits = format.getSplits(job, numSplits);
            //LOG.info("splitting: got =        " + splits.length);
            // check each split
            BitSet bits = new BitSet(length);
            for (int j = 0; j < splits.length; j++) {
                RecordReader<IntWritable, BytesWritable> reader = format.getRecordReader(splits[j], job, reporter);
                try {
                    int count = 0;
                    while (reader.next(key, value)) {
                        // if (bits.get(key.get())) {
                        // LOG.info("splits["+j+"]="+splits[j]+" : " + key.get());
                        // LOG.info("@"+reader.getPos());
                        // }
                        assertFalse("Key in multiple partitions.", bits.get(key.get()));
                        bits.set(key.get());
                        count++;
                    }
                //LOG.info("splits["+j+"]="+splits[j]+" count=" + count);
                } finally {
                    reader.close();
                }
            }
            assertEquals("Some keys in no partition.", length, bits.cardinality());
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) BitSet(java.util.BitSet) BytesWritable(org.apache.hadoop.io.BytesWritable) Random(java.util.Random) SequenceFile(org.apache.hadoop.io.SequenceFile) FileSystem(org.apache.hadoop.fs.FileSystem) IntWritable(org.apache.hadoop.io.IntWritable) Test(org.junit.Test)

Example 15 with BitSet

use of java.util.BitSet in project hadoop by apache.

the class TestTextInputFormat method testFormat.

@Test(timeout = 500000)
public void testFormat() throws Exception {
    JobConf job = new JobConf(defaultConf);
    Path file = new Path(workDir, "test.txt");
    // A reporter that does nothing
    Reporter reporter = Reporter.NULL;
    int seed = new Random().nextInt();
    LOG.info("seed = " + seed);
    Random random = new Random(seed);
    localFs.delete(workDir, true);
    FileInputFormat.setInputPaths(job, workDir);
    // for a variety of lengths
    for (int length = 0; length < MAX_LENGTH; length += random.nextInt(MAX_LENGTH / 10) + 1) {
        LOG.debug("creating; entries = " + length);
        // create a file with length entries
        Writer writer = new OutputStreamWriter(localFs.create(file));
        try {
            for (int i = 0; i < length; i++) {
                writer.write(Integer.toString(i));
                writer.write("\n");
            }
        } finally {
            writer.close();
        }
        // try splitting the file in a variety of sizes
        TextInputFormat format = new TextInputFormat();
        format.configure(job);
        LongWritable key = new LongWritable();
        Text value = new Text();
        for (int i = 0; i < 3; i++) {
            int numSplits = random.nextInt(MAX_LENGTH / 20) + 1;
            LOG.debug("splitting: requesting = " + numSplits);
            InputSplit[] splits = format.getSplits(job, numSplits);
            LOG.debug("splitting: got =        " + splits.length);
            if (length == 0) {
                assertEquals("Files of length 0 are not returned from FileInputFormat.getSplits().", 1, splits.length);
                assertEquals("Empty file length == 0", 0, splits[0].getLength());
            }
            // check each split
            BitSet bits = new BitSet(length);
            for (int j = 0; j < splits.length; j++) {
                LOG.debug("split[" + j + "]= " + splits[j]);
                RecordReader<LongWritable, Text> reader = format.getRecordReader(splits[j], job, reporter);
                try {
                    int count = 0;
                    while (reader.next(key, value)) {
                        int v = Integer.parseInt(value.toString());
                        LOG.debug("read " + v);
                        if (bits.get(v)) {
                            LOG.warn("conflict with " + v + " in split " + j + " at position " + reader.getPos());
                        }
                        assertFalse("Key in multiple partitions.", bits.get(v));
                        bits.set(v);
                        count++;
                    }
                    LOG.debug("splits[" + j + "]=" + splits[j] + " count=" + count);
                } finally {
                    reader.close();
                }
            }
            assertEquals("Some keys in no partition.", length, bits.cardinality());
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) BitSet(java.util.BitSet) Text(org.apache.hadoop.io.Text) Random(java.util.Random) OutputStreamWriter(java.io.OutputStreamWriter) LongWritable(org.apache.hadoop.io.LongWritable) Writer(java.io.Writer) OutputStreamWriter(java.io.OutputStreamWriter) Test(org.junit.Test)

Aggregations

BitSet (java.util.BitSet)754 ArrayList (java.util.ArrayList)82 Test (org.junit.Test)54 List (java.util.List)29 HashMap (java.util.HashMap)27 Map (java.util.Map)26 HashSet (java.util.HashSet)24 RegisterSpecList (com.android.dx.rop.code.RegisterSpecList)22 RegisterSpecList (com.taobao.android.dx.rop.code.RegisterSpecList)22 Random (java.util.Random)21 Cursor (android.database.Cursor)20 MatrixCursor (android.database.MatrixCursor)20 MergeCursor (android.database.MergeCursor)20 DirectoryResult (com.android.documentsui.DirectoryResult)20 LinkedList (java.util.LinkedList)16 IOException (java.io.IOException)14 Iterator (java.util.Iterator)13 BlockNode (jadx.core.dex.nodes.BlockNode)12 BitSetGroup (org.apache.carbondata.core.util.BitSetGroup)12 Directory (org.apache.lucene.store.Directory)12