Examples with BlockLocation - org.apache.hadoop.fs.BlockLocation

Example 76 with BlockLocation

use of org.apache.hadoop.fs.BlockLocation in project hadoop by apache.

the class TestFileAppend method testFailedAppendBlockRejection.

/**
   * Old replica of the block should not be accepted as valid for append/read
   */
@Test
public void testFailedAppendBlockRejection() throws Exception {
    Configuration conf = new HdfsConfiguration();
    conf.set("dfs.client.block.write.replace-datanode-on-failure.enable", "false");
    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
    DistributedFileSystem fs = null;
    try {
        fs = cluster.getFileSystem();
        Path path = new Path("/test");
        FSDataOutputStream out = fs.create(path);
        out.writeBytes("hello\n");
        out.close();
        // stop one datanode
        DataNodeProperties dnProp = cluster.stopDataNode(0);
        String dnAddress = dnProp.datanode.getXferAddress().toString();
        if (dnAddress.startsWith("/")) {
            dnAddress = dnAddress.substring(1);
        }
        // append again to bump genstamps
        for (int i = 0; i < 2; i++) {
            out = fs.append(path);
            out.writeBytes("helloagain\n");
            out.close();
        }
        // re-open and make the block state as underconstruction
        out = fs.append(path);
        cluster.restartDataNode(dnProp, true);
        // wait till the block report comes
        Thread.sleep(2000);
        // check the block locations, this should not contain restarted datanode
        BlockLocation[] locations = fs.getFileBlockLocations(path, 0, Long.MAX_VALUE);
        String[] names = locations[0].getNames();
        for (String node : names) {
            if (node.equals(dnAddress)) {
                fail("Failed append should not be present in latest block locations.");
            }
        }
        out.close();
    } finally {
        IOUtils.closeStream(fs);
        cluster.shutdown();
    }
}

Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) DataNodeProperties(org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties) BlockLocation(org.apache.hadoop.fs.BlockLocation) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) Test(org.junit.Test)

Example 77 with BlockLocation

use of org.apache.hadoop.fs.BlockLocation in project drill by apache.

the class TestAffinityCalculator method testBuildRangeMap.

//  @Test
//  public void testSetEndpointBytes(@Injectable final FileSystem fs, @Injectable final FileStatus file) throws Throwable{
//    final long blockSize = 256*1024*1024;
//    LinkedList<ParquetGroupScan.RowGroupInfo> rowGroups = new LinkedList<>();
//    int numberOfHosts = 4;
//    int numberOfBlocks = 3;
//    String port = "1234";
//    String[] hosts = new String[numberOfHosts];
//
//    final BlockLocation[] blockLocations = buildBlockLocations(hosts, blockSize);
//    final LinkedList<CoordinationProtos.DrillbitEndpoint> endPoints = buildEndpoints(numberOfHosts);
//    buildRowGroups(rowGroups, numberOfBlocks, blockSize, 3);
//
//    new NonStrictExpectations() {{
//      fs.getFileBlockLocations(file, 0, 3*blockSize); result = blockLocations;
//      fs.getFileStatus(new Path(path)); result = file;
//      file.getLen(); result = 3*blockSize;
//    }};
//
//
//    BlockMapBuilder ac = new BlockMapBuilder(fs, endPoints);
//    for (ParquetGroupScan.RowGroupInfo rowGroup : rowGroups) {
//      ac.setEndpointBytes(rowGroup);
//    }
//    ParquetGroupScan.RowGroupInfo rg = rowGroups.get(0);
//    Long b = rg.getEndpointBytes().get(endPoints.get(0));
//    assertEquals(blockSize,b.longValue());
//    b = rg.getEndpointBytes().get(endPoints.get(3));
//    assertNull(b);
//
//    buildRowGroups(rowGroups, numberOfBlocks, blockSize, 2);
//
//    ac = new BlockMapBuilder(fs, endPoints);
//    for (ParquetGroupScan.RowGroupInfo rowGroup : rowGroups) {
//      ac.setEndpointBytes(rowGroup);
//    }
//    rg = rowGroups.get(0);
//    b = rg.getEndpointBytes().get(endPoints.get(0));
//    assertEquals(blockSize*3/2,b.longValue());
//    b = rg.getEndpointBytes().get(endPoints.get(3));
//    assertEquals(blockSize / 2, b.longValue());
//
//    buildRowGroups(rowGroups, numberOfBlocks, blockSize, 6);
//
//    ac = new BlockMapBuilder(fs, endPoints);
//    for (ParquetGroupScan.RowGroupInfo rowGroup : rowGroups) {
//      ac.setEndpointBytes(rowGroup);
//    }
//    rg = rowGroups.get(0);
//    b = rg.getEndpointBytes().get(endPoints.get(0));
//    assertEquals(blockSize/2,b.longValue());
//    b = rg.getEndpointBytes().get(endPoints.get(3));
//    assertNull(b);
//  }
@Test
public void testBuildRangeMap() {
    BlockLocation[] blocks = buildBlockLocations(new String[4], 256 * 1024 * 1024);
    long tA = System.nanoTime();
    ImmutableRangeMap.Builder<Long, BlockLocation> blockMapBuilder = new ImmutableRangeMap.Builder<Long, BlockLocation>();
    for (BlockLocation block : blocks) {
        long start = block.getOffset();
        long end = start + block.getLength();
        Range<Long> range = Range.closedOpen(start, end);
        blockMapBuilder = blockMapBuilder.put(range, block);
    }
    ImmutableRangeMap<Long, BlockLocation> map = blockMapBuilder.build();
    long tB = System.nanoTime();
    System.out.println(String.format("Took %f ms to build range map", (tB - tA) / 1e6));
}

Also used : ImmutableRangeMap(com.google.common.collect.ImmutableRangeMap) BlockLocation(org.apache.hadoop.fs.BlockLocation) Test(org.junit.Test) ExecTest(org.apache.drill.exec.ExecTest)

Example 78 with BlockLocation

use of org.apache.hadoop.fs.BlockLocation in project asterixdb by apache.

the class HDFSUtils method getSplits.

/**
     * Instead of creating the split using the input format, we do it manually
     * This function returns fileSplits (1 per hdfs file block) irrespective of the number of partitions
     * and the produced splits only cover intersection between current files in hdfs and files stored internally
     * in AsterixDB
     * 1. NoOp means appended file
     * 2. AddOp means new file
     * 3. UpdateOp means the delta of a file
     *
     * @return
     * @throws IOException
     */
public static InputSplit[] getSplits(JobConf conf, List<ExternalFile> files) throws IOException {
    // Create file system object
    FileSystem fs = FileSystem.get(conf);
    ArrayList<FileSplit> fileSplits = new ArrayList<>();
    ArrayList<ExternalFile> orderedExternalFiles = new ArrayList<>();
    // Create files splits
    for (ExternalFile file : files) {
        Path filePath = new Path(file.getFileName());
        FileStatus fileStatus;
        try {
            fileStatus = fs.getFileStatus(filePath);
        } catch (FileNotFoundException e) {
            // file was deleted at some point, skip to next file
            continue;
        }
        if (file.getPendingOp() == ExternalFilePendingOp.ADD_OP && fileStatus.getModificationTime() == file.getLastModefiedTime().getTime()) {
            // Get its information from HDFS name node
            BlockLocation[] fileBlocks = fs.getFileBlockLocations(fileStatus, 0, file.getSize());
            // Create a split per block
            for (BlockLocation block : fileBlocks) {
                if (block.getOffset() < file.getSize()) {
                    fileSplits.add(new FileSplit(filePath, block.getOffset(), (block.getLength() + block.getOffset()) < file.getSize() ? block.getLength() : (file.getSize() - block.getOffset()), block.getHosts()));
                    orderedExternalFiles.add(file);
                }
            }
        } else if (file.getPendingOp() == ExternalFilePendingOp.NO_OP && fileStatus.getModificationTime() == file.getLastModefiedTime().getTime()) {
            long oldSize = 0L;
            long newSize = file.getSize();
            for (int i = 0; i < files.size(); i++) {
                if (files.get(i).getFileName() == file.getFileName() && files.get(i).getSize() != file.getSize()) {
                    newSize = files.get(i).getSize();
                    oldSize = file.getSize();
                    break;
                }
            }
            // Get its information from HDFS name node
            BlockLocation[] fileBlocks = fs.getFileBlockLocations(fileStatus, 0, newSize);
            // Create a split per block
            for (BlockLocation block : fileBlocks) {
                if (block.getOffset() + block.getLength() > oldSize) {
                    if (block.getOffset() < newSize) {
                        // Block interact with delta -> Create a split
                        long startCut = (block.getOffset() > oldSize) ? 0L : oldSize - block.getOffset();
                        long endCut = (block.getOffset() + block.getLength() < newSize) ? 0L : block.getOffset() + block.getLength() - newSize;
                        long splitLength = block.getLength() - startCut - endCut;
                        fileSplits.add(new FileSplit(filePath, block.getOffset() + startCut, splitLength, block.getHosts()));
                        orderedExternalFiles.add(file);
                    }
                }
            }
        }
    }
    fs.close();
    files.clear();
    files.addAll(orderedExternalFiles);
    return fileSplits.toArray(new FileSplit[fileSplits.size()]);
}

Also used : Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) FileSystem(org.apache.hadoop.fs.FileSystem) ArrayList(java.util.ArrayList) FileNotFoundException(java.io.FileNotFoundException) FileSplit(org.apache.hadoop.mapred.FileSplit) BlockLocation(org.apache.hadoop.fs.BlockLocation) ExternalFile(org.apache.asterix.external.indexing.ExternalFile)

Example 79 with BlockLocation

use of org.apache.hadoop.fs.BlockLocation in project hadoop by apache.

the class NativeAzureFileSystem method getFileBlockLocations.

/**
   * Return an array containing hostnames, offset and size of
   * portions of the given file. For WASB we'll just lie and give
   * fake hosts to make sure we get many splits in MR jobs.
   */
@Override
public BlockLocation[] getFileBlockLocations(FileStatus file, long start, long len) throws IOException {
    if (file == null) {
        return null;
    }
    if ((start < 0) || (len < 0)) {
        throw new IllegalArgumentException("Invalid start or len parameter");
    }
    if (file.getLen() < start) {
        return new BlockLocation[0];
    }
    final String blobLocationHost = getConf().get(AZURE_BLOCK_LOCATION_HOST_PROPERTY_NAME, AZURE_BLOCK_LOCATION_HOST_DEFAULT);
    final String[] name = { blobLocationHost };
    final String[] host = { blobLocationHost };
    long blockSize = file.getBlockSize();
    if (blockSize <= 0) {
        throw new IllegalArgumentException("The block size for the given file is not a positive number: " + blockSize);
    }
    int numberOfLocations = (int) (len / blockSize) + ((len % blockSize == 0) ? 0 : 1);
    BlockLocation[] locations = new BlockLocation[numberOfLocations];
    for (int i = 0; i < locations.length; i++) {
        long currentOffset = start + (i * blockSize);
        long currentLength = Math.min(blockSize, start + len - currentOffset);
        locations[i] = new BlockLocation(name, host, currentOffset, currentLength);
    }
    return locations;
}

Also used : BlockLocation(org.apache.hadoop.fs.BlockLocation)

Example 80 with BlockLocation

use of org.apache.hadoop.fs.BlockLocation in project hadoop by apache.

the class TestNativeAzureFileSystemBlockLocations method getBlockLocationsOutput.

private static BlockLocation[] getBlockLocationsOutput(int fileSize, int blockSize, long start, long len, String blockLocationHost) throws Exception {
    Configuration conf = new Configuration();
    conf.set(NativeAzureFileSystem.AZURE_BLOCK_SIZE_PROPERTY_NAME, "" + blockSize);
    if (blockLocationHost != null) {
        conf.set(NativeAzureFileSystem.AZURE_BLOCK_LOCATION_HOST_PROPERTY_NAME, blockLocationHost);
    }
    AzureBlobStorageTestAccount testAccount = AzureBlobStorageTestAccount.createMock(conf);
    FileSystem fs = testAccount.getFileSystem();
    Path testFile = createTestFile(fs, fileSize);
    FileStatus stat = fs.getFileStatus(testFile);
    BlockLocation[] locations = fs.getFileBlockLocations(stat, start, len);
    testAccount.cleanup();
    return locations;
}

Also used : Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) Configuration(org.apache.hadoop.conf.Configuration) FileSystem(org.apache.hadoop.fs.FileSystem) BlockLocation(org.apache.hadoop.fs.BlockLocation)

Aggregations

BlockLocation (org.apache.hadoop.fs.BlockLocation)88 Path (org.apache.hadoop.fs.Path)41 FileStatus (org.apache.hadoop.fs.FileStatus)30 Test (org.junit.Test)29 FileSystem (org.apache.hadoop.fs.FileSystem)16 ArrayList (java.util.ArrayList)14 Configuration (org.apache.hadoop.conf.Configuration)14 IOException (java.io.IOException)10 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)10 DatanodeInfo (org.apache.hadoop.hdfs.protocol.DatanodeInfo)7 DataNode (org.apache.hadoop.hdfs.server.datanode.DataNode)7 InetSocketAddress (java.net.InetSocketAddress)5 FSDataInputStream (org.apache.hadoop.fs.FSDataInputStream)5 LocatedFileStatus (org.apache.hadoop.fs.LocatedFileStatus)5 HdfsConfiguration (org.apache.hadoop.hdfs.HdfsConfiguration)5 LocatedBlocks (org.apache.hadoop.hdfs.protocol.LocatedBlocks)5 IgfsBlockLocation (org.apache.ignite.igfs.IgfsBlockLocation)5 IgfsPath (org.apache.ignite.igfs.IgfsPath)5 HashMap (java.util.HashMap)4 Random (java.util.Random)4