Search in sources :

Example 51 with BlockLocation

use of org.apache.hadoop.fs.BlockLocation in project druid by druid-io.

the class DatasourceInputFormatTest method setUp.

@Before
public void setUp() throws Exception {
    segments = ImmutableList.of(WindowedDataSegment.of(new DataSegment("test1", Interval.parse("2000/3000"), "ver", ImmutableMap.<String, Object>of("type", "local", "path", "/tmp/index1.zip"), ImmutableList.of("host"), ImmutableList.of("visited_sum", "unique_hosts"), NoneShardSpec.instance(), 9, 2)), WindowedDataSegment.of(new DataSegment("test2", Interval.parse("2050/3000"), "ver", ImmutableMap.<String, Object>of("type", "hdfs", "path", "/tmp/index2.zip"), ImmutableList.of("host"), ImmutableList.of("visited_sum", "unique_hosts"), NoneShardSpec.instance(), 9, 11)), WindowedDataSegment.of(new DataSegment("test3", Interval.parse("2030/3000"), "ver", ImmutableMap.<String, Object>of("type", "hdfs", "path", "/tmp/index3.zip"), ImmutableList.of("host"), ImmutableList.of("visited_sum", "unique_hosts"), NoneShardSpec.instance(), 9, 4)));
    Path path1 = new Path(JobHelper.getURIFromSegment(segments.get(0).getSegment()));
    Path path2 = new Path(JobHelper.getURIFromSegment(segments.get(1).getSegment()));
    Path path3 = new Path(JobHelper.getURIFromSegment(segments.get(2).getSegment()));
    // dummy locations for test
    locations = ImmutableList.of(new LocatedFileStatus(1000, false, 0, 0, 0, 0, null, null, null, null, path1, new BlockLocation[] { new BlockLocation(null, new String[] { "s1", "s2" }, 0, 600), new BlockLocation(null, new String[] { "s2", "s3" }, 600, 400) }), new LocatedFileStatus(4000, false, 0, 0, 0, 0, null, null, null, null, path2, new BlockLocation[] { new BlockLocation(null, new String[] { "s1", "s2" }, 0, 1000), new BlockLocation(null, new String[] { "s1", "s3" }, 1000, 1200), new BlockLocation(null, new String[] { "s2", "s3" }, 2200, 1100), new BlockLocation(null, new String[] { "s1", "s2" }, 3300, 700) }), new LocatedFileStatus(500, false, 0, 0, 0, 0, null, null, null, null, path3, new BlockLocation[] { new BlockLocation(null, new String[] { "s2", "s3" }, 0, 500) }));
    config = new JobConf();
    config.set(DatasourceInputFormat.CONF_INPUT_SEGMENTS, new DefaultObjectMapper().writeValueAsString(segments));
    context = EasyMock.createMock(JobContext.class);
    EasyMock.expect(context.getConfiguration()).andReturn(config);
    EasyMock.replay(context);
}
Also used : Path(org.apache.hadoop.fs.Path) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus) DefaultObjectMapper(io.druid.jackson.DefaultObjectMapper) JobContext(org.apache.hadoop.mapreduce.JobContext) BlockLocation(org.apache.hadoop.fs.BlockLocation) DataSegment(io.druid.timeline.DataSegment) JobConf(org.apache.hadoop.mapred.JobConf) Before(org.junit.Before)

Example 52 with BlockLocation

use of org.apache.hadoop.fs.BlockLocation in project presto by prestodb.

the class BackgroundHiveSplitLoader method createHiveSplitIterator.

private Iterator<HiveSplit> createHiveSplitIterator(String partitionName, String path, BlockLocation[] blockLocations, long start, long length, Properties schema, List<HivePartitionKey> partitionKeys, boolean splittable, ConnectorSession session, OptionalInt bucketNumber, TupleDomain<HiveColumnHandle> effectivePredicate, Map<Integer, HiveType> columnCoercions) throws IOException {
    boolean forceLocalScheduling = HiveSessionProperties.isForceLocalScheduling(session);
    if (splittable) {
        PeekingIterator<BlockLocation> blockLocationIterator = Iterators.peekingIterator(Arrays.stream(blockLocations).iterator());
        return new AbstractIterator<HiveSplit>() {

            private long chunkOffset = 0;

            @Override
            protected HiveSplit computeNext() {
                if (!blockLocationIterator.hasNext()) {
                    return endOfData();
                }
                BlockLocation blockLocation = blockLocationIterator.peek();
                List<HostAddress> addresses;
                try {
                    addresses = toHostAddress(blockLocation.getHosts());
                } catch (IOException e) {
                    throw Throwables.propagate(e);
                }
                long targetChunkSize;
                if (remainingInitialSplits.decrementAndGet() >= 0) {
                    targetChunkSize = maxInitialSplitSize.toBytes();
                } else {
                    long maxBytes = maxSplitSize.toBytes();
                    int chunks = toIntExact((long) Math.ceil((blockLocation.getLength() - chunkOffset) * 1.0 / maxBytes));
                    targetChunkSize = (long) Math.ceil((blockLocation.getLength() - chunkOffset) * 1.0 / chunks);
                }
                // adjust the actual chunk size to account for the overrun when chunks are slightly bigger than necessary (see above)
                long chunkLength = Math.min(targetChunkSize, blockLocation.getLength() - chunkOffset);
                HiveSplit result = new HiveSplit(connectorId, table.getDatabaseName(), table.getTableName(), partitionName, path, blockLocation.getOffset() + chunkOffset, chunkLength, schema, partitionKeys, addresses, bucketNumber, forceLocalScheduling && hasRealAddress(addresses), effectivePredicate, columnCoercions);
                chunkOffset += chunkLength;
                if (chunkOffset >= blockLocation.getLength()) {
                    checkState(chunkOffset == blockLocation.getLength(), "Error splitting blocks");
                    blockLocationIterator.next();
                    chunkOffset = 0;
                }
                return result;
            }
        };
    } else {
        // not splittable, use the hosts from the first block if it exists
        List<HostAddress> addresses = ImmutableList.of();
        if (blockLocations.length > 0) {
            addresses = toHostAddress(blockLocations[0].getHosts());
        }
        return Iterators.singletonIterator(new HiveSplit(connectorId, table.getDatabaseName(), table.getTableName(), partitionName, path, start, length, schema, partitionKeys, addresses, bucketNumber, forceLocalScheduling && hasRealAddress(addresses), effectivePredicate, columnCoercions));
    }
}
Also used : AbstractIterator(com.google.common.collect.AbstractIterator) IOException(java.io.IOException) BlockLocation(org.apache.hadoop.fs.BlockLocation) HostAddress(com.facebook.presto.spi.HostAddress)

Example 53 with BlockLocation

use of org.apache.hadoop.fs.BlockLocation in project hadoop by apache.

the class TestFavoredNodesEndToEnd method testFavoredNodesEndToEnd.

@Test(timeout = 180000)
public void testFavoredNodesEndToEnd() throws Exception {
    //create 10 files with random preferred nodes
    for (int i = 0; i < NUM_FILES; i++) {
        Random rand = new Random(System.currentTimeMillis() + i);
        //pass a new created rand so as to get a uniform distribution each time
        //without too much collisions (look at the do-while loop in getDatanodes)
        InetSocketAddress[] datanode = getDatanodes(rand);
        Path p = new Path("/filename" + i);
        FSDataOutputStream out = dfs.create(p, FsPermission.getDefault(), true, 4096, (short) 3, 4096L, null, datanode);
        out.write(SOME_BYTES);
        out.close();
        BlockLocation[] locations = getBlockLocations(p);
        //verify the files got created in the right nodes
        for (BlockLocation loc : locations) {
            String[] hosts = loc.getNames();
            String[] hosts1 = getStringForInetSocketAddrs(datanode);
            assertTrue(compareNodes(hosts, hosts1));
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Random(java.util.Random) InetSocketAddress(java.net.InetSocketAddress) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) BlockLocation(org.apache.hadoop.fs.BlockLocation) Test(org.junit.Test)

Example 54 with BlockLocation

use of org.apache.hadoop.fs.BlockLocation in project hadoop by apache.

the class TestFavoredNodesEndToEnd method testFavoredNodesEndToEndForAppend.

@Test(timeout = 180000)
public void testFavoredNodesEndToEndForAppend() throws Exception {
    // create 10 files with random preferred nodes
    for (int i = 0; i < NUM_FILES; i++) {
        Random rand = new Random(System.currentTimeMillis() + i);
        // pass a new created rand so as to get a uniform distribution each time
        // without too much collisions (look at the do-while loop in getDatanodes)
        InetSocketAddress[] datanode = getDatanodes(rand);
        Path p = new Path("/filename" + i);
        // create and close the file.
        dfs.create(p, FsPermission.getDefault(), true, 4096, (short) 3, 4096L, null, null).close();
        // re-open for append
        FSDataOutputStream out = dfs.append(p, EnumSet.of(CreateFlag.APPEND), 4096, null, datanode);
        out.write(SOME_BYTES);
        out.close();
        BlockLocation[] locations = getBlockLocations(p);
        // verify the files got created in the right nodes
        for (BlockLocation loc : locations) {
            String[] hosts = loc.getNames();
            String[] hosts1 = getStringForInetSocketAddrs(datanode);
            assertTrue(compareNodes(hosts, hosts1));
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Random(java.util.Random) InetSocketAddress(java.net.InetSocketAddress) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) BlockLocation(org.apache.hadoop.fs.BlockLocation) Test(org.junit.Test)

Example 55 with BlockLocation

use of org.apache.hadoop.fs.BlockLocation in project hadoop by apache.

the class TestFavoredNodesEndToEnd method testWhenSomeNodesAreNotGood.

@Test(timeout = 180000)
public void testWhenSomeNodesAreNotGood() throws Exception {
    // 4 favored nodes
    final InetSocketAddress[] addrs = new InetSocketAddress[4];
    final String[] hosts = new String[addrs.length];
    for (int i = 0; i < addrs.length; i++) {
        addrs[i] = datanodes.get(i).getXferAddress();
        hosts[i] = addrs[i].getAddress().getHostAddress() + ":" + addrs[i].getPort();
    }
    //make some datanode not "good" so that even if the client prefers it,
    //the namenode would not give it as a replica to write to
    DatanodeInfo d = cluster.getNameNode().getNamesystem().getBlockManager().getDatanodeManager().getDatanodeByXferAddr(addrs[0].getAddress().getHostAddress(), addrs[0].getPort());
    //set the decommission status to true so that 
    //BlockPlacementPolicyDefault.isGoodTarget returns false for this dn
    d.setDecommissioned();
    Path p = new Path("/filename-foo-bar-baz");
    final short replication = (short) 3;
    FSDataOutputStream out = dfs.create(p, FsPermission.getDefault(), true, 4096, replication, 4096L, null, addrs);
    out.write(SOME_BYTES);
    out.close();
    //reset the state
    d.stopDecommission();
    BlockLocation[] locations = getBlockLocations(p);
    Assert.assertEquals(replication, locations[0].getNames().length);
    ;
    //also make sure that the datanode[0] is not in the list of hosts
    for (int i = 0; i < replication; i++) {
        final String loc = locations[0].getNames()[i];
        int j = 0;
        for (; j < hosts.length && !loc.equals(hosts[j]); j++) ;
        Assert.assertTrue("j=" + j, j > 0);
        Assert.assertTrue("loc=" + loc + " not in host list " + Arrays.asList(hosts) + ", j=" + j, j < hosts.length);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) DatanodeInfo(org.apache.hadoop.hdfs.protocol.DatanodeInfo) InetSocketAddress(java.net.InetSocketAddress) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) BlockLocation(org.apache.hadoop.fs.BlockLocation) Test(org.junit.Test)

Aggregations

BlockLocation (org.apache.hadoop.fs.BlockLocation)88 Path (org.apache.hadoop.fs.Path)41 FileStatus (org.apache.hadoop.fs.FileStatus)30 Test (org.junit.Test)29 FileSystem (org.apache.hadoop.fs.FileSystem)16 ArrayList (java.util.ArrayList)14 Configuration (org.apache.hadoop.conf.Configuration)14 IOException (java.io.IOException)10 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)10 DatanodeInfo (org.apache.hadoop.hdfs.protocol.DatanodeInfo)7 DataNode (org.apache.hadoop.hdfs.server.datanode.DataNode)7 InetSocketAddress (java.net.InetSocketAddress)5 FSDataInputStream (org.apache.hadoop.fs.FSDataInputStream)5 LocatedFileStatus (org.apache.hadoop.fs.LocatedFileStatus)5 HdfsConfiguration (org.apache.hadoop.hdfs.HdfsConfiguration)5 LocatedBlocks (org.apache.hadoop.hdfs.protocol.LocatedBlocks)5 IgfsBlockLocation (org.apache.ignite.igfs.IgfsBlockLocation)5 IgfsPath (org.apache.ignite.igfs.IgfsPath)5 HashMap (java.util.HashMap)4 Random (java.util.Random)4