Search in sources :

Example 16 with BlockLocation

use of org.apache.hadoop.fs.BlockLocation in project hadoop by apache.

the class TestDistributedFileSystem method testLocatedFileStatusStorageIdsTypes.

@Test(timeout = 120000)
public void testLocatedFileStatusStorageIdsTypes() throws Exception {
    final Configuration conf = getTestConfiguration();
    final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
    try {
        final DistributedFileSystem fs = cluster.getFileSystem();
        final Path testFile = new Path("/testListLocatedStatus");
        final int blockSize = 4096;
        final int numBlocks = 10;
        // Create a test file
        final int repl = 2;
        DFSTestUtil.createFile(fs, testFile, blockSize, numBlocks * blockSize, blockSize, (short) repl, 0xADDED);
        DFSTestUtil.waitForReplication(fs, testFile, (short) repl, 30000);
        // Get the listing
        RemoteIterator<LocatedFileStatus> it = fs.listLocatedStatus(testFile);
        assertTrue("Expected file to be present", it.hasNext());
        LocatedFileStatus stat = it.next();
        BlockLocation[] locs = stat.getBlockLocations();
        assertEquals("Unexpected number of locations", numBlocks, locs.length);
        Set<String> dnStorageIds = new HashSet<>();
        for (DataNode d : cluster.getDataNodes()) {
            try (FsDatasetSpi.FsVolumeReferences volumes = d.getFSDataset().getFsVolumeReferences()) {
                for (FsVolumeSpi vol : volumes) {
                    dnStorageIds.add(vol.getStorageID());
                }
            }
        }
        for (BlockLocation loc : locs) {
            String[] ids = loc.getStorageIds();
            // Run it through a set to deduplicate, since there should be no dupes
            Set<String> storageIds = new HashSet<>();
            Collections.addAll(storageIds, ids);
            assertEquals("Unexpected num storage ids", repl, storageIds.size());
            // Make sure these are all valid storage IDs
            assertTrue("Unknown storage IDs found!", dnStorageIds.containsAll(storageIds));
            // Check storage types are the default, since we didn't set any
            StorageType[] types = loc.getStorageTypes();
            assertEquals("Unexpected num storage types", repl, types.length);
            for (StorageType t : types) {
                assertEquals("Unexpected storage type", StorageType.DEFAULT, t);
            }
        }
    } finally {
        if (cluster != null) {
            cluster.shutdown();
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) StorageType(org.apache.hadoop.fs.StorageType) Configuration(org.apache.hadoop.conf.Configuration) FsDatasetSpi(org.apache.hadoop.hdfs.server.datanode.fsdataset.FsDatasetSpi) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus) BlockLocation(org.apache.hadoop.fs.BlockLocation) DataNode(org.apache.hadoop.hdfs.server.datanode.DataNode) FsVolumeSpi(org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeSpi) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 17 with BlockLocation

use of org.apache.hadoop.fs.BlockLocation in project hadoop by apache.

the class TestDatanodeDeath method checkFile.

//
// verify that the data written are sane
// 
private static void checkFile(FileSystem fileSys, Path name, int repl, int numblocks, int filesize, long seed) throws IOException {
    boolean done = false;
    int attempt = 0;
    long len = fileSys.getFileStatus(name).getLen();
    assertTrue(name + " should be of size " + filesize + " but found to be of size " + len, len == filesize);
    // wait till all full blocks are confirmed by the datanodes.
    while (!done) {
        attempt++;
        try {
            Thread.sleep(1000);
        } catch (InterruptedException e) {
        }
        done = true;
        BlockLocation[] locations = fileSys.getFileBlockLocations(fileSys.getFileStatus(name), 0, filesize);
        if (locations.length < numblocks) {
            if (attempt > 100) {
                System.out.println("File " + name + " has only " + locations.length + " blocks, " + " but is expected to have " + numblocks + " blocks.");
            }
            done = false;
            continue;
        }
        for (int idx = 0; idx < locations.length; idx++) {
            if (locations[idx].getHosts().length < repl) {
                if (attempt > 100) {
                    System.out.println("File " + name + " has " + locations.length + " blocks: " + " The " + idx + " block has only " + locations[idx].getHosts().length + " replicas but is expected to have " + repl + " replicas.");
                }
                done = false;
                break;
            }
        }
    }
    FSDataInputStream stm = fileSys.open(name);
    final byte[] expected = AppendTestUtil.randomBytes(seed, fileSize);
    // do a sanity check. Read the file
    byte[] actual = new byte[filesize];
    stm.readFully(0, actual);
    checkData(actual, 0, expected, "Read 1");
}
Also used : FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) BlockLocation(org.apache.hadoop.fs.BlockLocation)

Example 18 with BlockLocation

use of org.apache.hadoop.fs.BlockLocation in project hadoop by apache.

the class ViewFsBaseTest method testGetBlockLocations.

// override for HDFS
@Test
public void testGetBlockLocations() throws IOException {
    Path targetFilePath = new Path(targetTestRoot, "data/largeFile");
    FileContextTestHelper.createFile(fcTarget, targetFilePath, 10, 1024);
    Path viewFilePath = new Path("/data/largeFile");
    checkFileStatus(fcView, viewFilePath.toString(), fileType.isFile);
    BlockLocation[] viewBL = fcView.getFileBlockLocations(viewFilePath, 0, 10240 + 100);
    Assert.assertEquals(SupportsBlocks ? 10 : 1, viewBL.length);
    BlockLocation[] targetBL = fcTarget.getFileBlockLocations(targetFilePath, 0, 10240 + 100);
    compareBLs(viewBL, targetBL);
    // Same test but now get it via the FileStatus Parameter
    fcView.getFileBlockLocations(viewFilePath, 0, 10240 + 100);
    targetBL = fcTarget.getFileBlockLocations(targetFilePath, 0, 10240 + 100);
    compareBLs(viewBL, targetBL);
}
Also used : Path(org.apache.hadoop.fs.Path) BlockLocation(org.apache.hadoop.fs.BlockLocation) Test(org.junit.Test)

Example 19 with BlockLocation

use of org.apache.hadoop.fs.BlockLocation in project hadoop by apache.

the class TestSwiftFileSystemPartitionedUploads method testFilePartUploadNoLengthCheck.

/**
   * tests functionality for big files ( > 5Gb) upload
   */
@Test(timeout = SWIFT_BULK_IO_TEST_TIMEOUT)
public void testFilePartUploadNoLengthCheck() throws IOException, URISyntaxException {
    final Path path = new Path("/test/testFilePartUploadLengthCheck");
    int len = 8192;
    final byte[] src = SwiftTestUtils.dataset(len, 32, 144);
    FSDataOutputStream out = fs.create(path, false, getBufferSize(), (short) 1, BLOCK_SIZE);
    try {
        int totalPartitionsToWrite = len / PART_SIZE_BYTES;
        assertPartitionsWritten("Startup", out, 0);
        //write 2048
        int firstWriteLen = 2048;
        out.write(src, 0, firstWriteLen);
        //assert
        long expected = getExpectedPartitionsWritten(firstWriteLen, PART_SIZE_BYTES, false);
        SwiftUtils.debug(LOG, "First write: predict %d partitions written", expected);
        assertPartitionsWritten("First write completed", out, expected);
        //write the rest
        int remainder = len - firstWriteLen;
        SwiftUtils.debug(LOG, "remainder: writing: %d bytes", remainder);
        out.write(src, firstWriteLen, remainder);
        expected = getExpectedPartitionsWritten(len, PART_SIZE_BYTES, false);
        assertPartitionsWritten("Remaining data", out, expected);
        out.close();
        expected = getExpectedPartitionsWritten(len, PART_SIZE_BYTES, true);
        assertPartitionsWritten("Stream closed", out, expected);
        Header[] headers = fs.getStore().getObjectHeaders(path, true);
        for (Header header : headers) {
            LOG.info(header.toString());
        }
        byte[] dest = readDataset(fs, path, len);
        LOG.info("Read dataset from " + path + ": data length =" + len);
        //compare data
        SwiftTestUtils.compareByteArrays(src, dest, len);
        FileStatus status = fs.getFileStatus(path);
        //now see what block location info comes back.
        //This will vary depending on the Swift version, so the results
        //aren't checked -merely that the test actually worked
        BlockLocation[] locations = fs.getFileBlockLocations(status, 0, len);
        assertNotNull("Null getFileBlockLocations()", locations);
        assertTrue("empty array returned for getFileBlockLocations()", locations.length > 0);
    } finally {
        IOUtils.closeStream(out);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) Header(org.apache.commons.httpclient.Header) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) BlockLocation(org.apache.hadoop.fs.BlockLocation) Test(org.junit.Test)

Example 20 with BlockLocation

use of org.apache.hadoop.fs.BlockLocation in project hadoop by apache.

the class TestSwiftFileSystemPartitionedUploads method testFilePartUpload.

/**
   * tests functionality for big files ( > 5Gb) upload
   */
@Test(timeout = SWIFT_BULK_IO_TEST_TIMEOUT)
public void testFilePartUpload() throws Throwable {
    final Path path = new Path("/test/testFilePartUpload");
    int len = 8192;
    final byte[] src = SwiftTestUtils.dataset(len, 32, 144);
    FSDataOutputStream out = fs.create(path, false, getBufferSize(), (short) 1, BLOCK_SIZE);
    try {
        int totalPartitionsToWrite = len / PART_SIZE_BYTES;
        assertPartitionsWritten("Startup", out, 0);
        //write 2048
        int firstWriteLen = 2048;
        out.write(src, 0, firstWriteLen);
        //assert
        long expected = getExpectedPartitionsWritten(firstWriteLen, PART_SIZE_BYTES, false);
        SwiftUtils.debug(LOG, "First write: predict %d partitions written", expected);
        assertPartitionsWritten("First write completed", out, expected);
        //write the rest
        int remainder = len - firstWriteLen;
        SwiftUtils.debug(LOG, "remainder: writing: %d bytes", remainder);
        out.write(src, firstWriteLen, remainder);
        expected = getExpectedPartitionsWritten(len, PART_SIZE_BYTES, false);
        assertPartitionsWritten("Remaining data", out, expected);
        out.close();
        expected = getExpectedPartitionsWritten(len, PART_SIZE_BYTES, true);
        assertPartitionsWritten("Stream closed", out, expected);
        Header[] headers = fs.getStore().getObjectHeaders(path, true);
        for (Header header : headers) {
            LOG.info(header.toString());
        }
        byte[] dest = readDataset(fs, path, len);
        LOG.info("Read dataset from " + path + ": data length =" + len);
        //compare data
        SwiftTestUtils.compareByteArrays(src, dest, len);
        FileStatus status;
        final Path qualifiedPath = path.makeQualified(fs);
        status = fs.getFileStatus(qualifiedPath);
        //now see what block location info comes back.
        //This will vary depending on the Swift version, so the results
        //aren't checked -merely that the test actually worked
        BlockLocation[] locations = fs.getFileBlockLocations(status, 0, len);
        assertNotNull("Null getFileBlockLocations()", locations);
        assertTrue("empty array returned for getFileBlockLocations()", locations.length > 0);
        //to a skip
        try {
            validatePathLen(path, len);
        } catch (AssertionError e) {
            //downgrade to a skip
            throw new AssumptionViolatedException(e, null);
        }
    } finally {
        IOUtils.closeStream(out);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) Header(org.apache.commons.httpclient.Header) AssumptionViolatedException(org.junit.internal.AssumptionViolatedException) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) BlockLocation(org.apache.hadoop.fs.BlockLocation) Test(org.junit.Test)

Aggregations

BlockLocation (org.apache.hadoop.fs.BlockLocation)87 Path (org.apache.hadoop.fs.Path)41 FileStatus (org.apache.hadoop.fs.FileStatus)30 Test (org.junit.Test)29 FileSystem (org.apache.hadoop.fs.FileSystem)16 Configuration (org.apache.hadoop.conf.Configuration)14 ArrayList (java.util.ArrayList)13 IOException (java.io.IOException)10 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)10 DatanodeInfo (org.apache.hadoop.hdfs.protocol.DatanodeInfo)7 DataNode (org.apache.hadoop.hdfs.server.datanode.DataNode)7 InetSocketAddress (java.net.InetSocketAddress)5 FSDataInputStream (org.apache.hadoop.fs.FSDataInputStream)5 LocatedFileStatus (org.apache.hadoop.fs.LocatedFileStatus)5 HdfsConfiguration (org.apache.hadoop.hdfs.HdfsConfiguration)5 LocatedBlocks (org.apache.hadoop.hdfs.protocol.LocatedBlocks)5 IgfsBlockLocation (org.apache.ignite.igfs.IgfsBlockLocation)5 IgfsPath (org.apache.ignite.igfs.IgfsPath)5 HashMap (java.util.HashMap)4 Random (java.util.Random)4