use of org.apache.hadoop.fs.BlockLocation in project druid by druid-io.
the class DatasourceInputFormatTest method setUp.
@Before
public void setUp() throws Exception {
segments = ImmutableList.of(WindowedDataSegment.of(new DataSegment("test1", Interval.parse("2000/3000"), "ver", ImmutableMap.<String, Object>of("type", "local", "path", "/tmp/index1.zip"), ImmutableList.of("host"), ImmutableList.of("visited_sum", "unique_hosts"), NoneShardSpec.instance(), 9, 2)), WindowedDataSegment.of(new DataSegment("test2", Interval.parse("2050/3000"), "ver", ImmutableMap.<String, Object>of("type", "hdfs", "path", "/tmp/index2.zip"), ImmutableList.of("host"), ImmutableList.of("visited_sum", "unique_hosts"), NoneShardSpec.instance(), 9, 11)), WindowedDataSegment.of(new DataSegment("test3", Interval.parse("2030/3000"), "ver", ImmutableMap.<String, Object>of("type", "hdfs", "path", "/tmp/index3.zip"), ImmutableList.of("host"), ImmutableList.of("visited_sum", "unique_hosts"), NoneShardSpec.instance(), 9, 4)));
Path path1 = new Path(JobHelper.getURIFromSegment(segments.get(0).getSegment()));
Path path2 = new Path(JobHelper.getURIFromSegment(segments.get(1).getSegment()));
Path path3 = new Path(JobHelper.getURIFromSegment(segments.get(2).getSegment()));
// dummy locations for test
locations = ImmutableList.of(new LocatedFileStatus(1000, false, 0, 0, 0, 0, null, null, null, null, path1, new BlockLocation[] { new BlockLocation(null, new String[] { "s1", "s2" }, 0, 600), new BlockLocation(null, new String[] { "s2", "s3" }, 600, 400) }), new LocatedFileStatus(4000, false, 0, 0, 0, 0, null, null, null, null, path2, new BlockLocation[] { new BlockLocation(null, new String[] { "s1", "s2" }, 0, 1000), new BlockLocation(null, new String[] { "s1", "s3" }, 1000, 1200), new BlockLocation(null, new String[] { "s2", "s3" }, 2200, 1100), new BlockLocation(null, new String[] { "s1", "s2" }, 3300, 700) }), new LocatedFileStatus(500, false, 0, 0, 0, 0, null, null, null, null, path3, new BlockLocation[] { new BlockLocation(null, new String[] { "s2", "s3" }, 0, 500) }));
config = new JobConf();
config.set(DatasourceInputFormat.CONF_INPUT_SEGMENTS, new DefaultObjectMapper().writeValueAsString(segments));
context = EasyMock.createMock(JobContext.class);
EasyMock.expect(context.getConfiguration()).andReturn(config);
EasyMock.replay(context);
}
use of org.apache.hadoop.fs.BlockLocation in project presto by prestodb.
the class BackgroundHiveSplitLoader method createHiveSplitIterator.
private Iterator<HiveSplit> createHiveSplitIterator(String partitionName, String path, BlockLocation[] blockLocations, long start, long length, Properties schema, List<HivePartitionKey> partitionKeys, boolean splittable, ConnectorSession session, OptionalInt bucketNumber, TupleDomain<HiveColumnHandle> effectivePredicate, Map<Integer, HiveType> columnCoercions) throws IOException {
boolean forceLocalScheduling = HiveSessionProperties.isForceLocalScheduling(session);
if (splittable) {
PeekingIterator<BlockLocation> blockLocationIterator = Iterators.peekingIterator(Arrays.stream(blockLocations).iterator());
return new AbstractIterator<HiveSplit>() {
private long chunkOffset = 0;
@Override
protected HiveSplit computeNext() {
if (!blockLocationIterator.hasNext()) {
return endOfData();
}
BlockLocation blockLocation = blockLocationIterator.peek();
List<HostAddress> addresses;
try {
addresses = toHostAddress(blockLocation.getHosts());
} catch (IOException e) {
throw Throwables.propagate(e);
}
long targetChunkSize;
if (remainingInitialSplits.decrementAndGet() >= 0) {
targetChunkSize = maxInitialSplitSize.toBytes();
} else {
long maxBytes = maxSplitSize.toBytes();
int chunks = toIntExact((long) Math.ceil((blockLocation.getLength() - chunkOffset) * 1.0 / maxBytes));
targetChunkSize = (long) Math.ceil((blockLocation.getLength() - chunkOffset) * 1.0 / chunks);
}
// adjust the actual chunk size to account for the overrun when chunks are slightly bigger than necessary (see above)
long chunkLength = Math.min(targetChunkSize, blockLocation.getLength() - chunkOffset);
HiveSplit result = new HiveSplit(connectorId, table.getDatabaseName(), table.getTableName(), partitionName, path, blockLocation.getOffset() + chunkOffset, chunkLength, schema, partitionKeys, addresses, bucketNumber, forceLocalScheduling && hasRealAddress(addresses), effectivePredicate, columnCoercions);
chunkOffset += chunkLength;
if (chunkOffset >= blockLocation.getLength()) {
checkState(chunkOffset == blockLocation.getLength(), "Error splitting blocks");
blockLocationIterator.next();
chunkOffset = 0;
}
return result;
}
};
} else {
// not splittable, use the hosts from the first block if it exists
List<HostAddress> addresses = ImmutableList.of();
if (blockLocations.length > 0) {
addresses = toHostAddress(blockLocations[0].getHosts());
}
return Iterators.singletonIterator(new HiveSplit(connectorId, table.getDatabaseName(), table.getTableName(), partitionName, path, start, length, schema, partitionKeys, addresses, bucketNumber, forceLocalScheduling && hasRealAddress(addresses), effectivePredicate, columnCoercions));
}
}
use of org.apache.hadoop.fs.BlockLocation in project hadoop by apache.
the class TestFavoredNodesEndToEnd method testFavoredNodesEndToEnd.
@Test(timeout = 180000)
public void testFavoredNodesEndToEnd() throws Exception {
//create 10 files with random preferred nodes
for (int i = 0; i < NUM_FILES; i++) {
Random rand = new Random(System.currentTimeMillis() + i);
//pass a new created rand so as to get a uniform distribution each time
//without too much collisions (look at the do-while loop in getDatanodes)
InetSocketAddress[] datanode = getDatanodes(rand);
Path p = new Path("/filename" + i);
FSDataOutputStream out = dfs.create(p, FsPermission.getDefault(), true, 4096, (short) 3, 4096L, null, datanode);
out.write(SOME_BYTES);
out.close();
BlockLocation[] locations = getBlockLocations(p);
//verify the files got created in the right nodes
for (BlockLocation loc : locations) {
String[] hosts = loc.getNames();
String[] hosts1 = getStringForInetSocketAddrs(datanode);
assertTrue(compareNodes(hosts, hosts1));
}
}
}
use of org.apache.hadoop.fs.BlockLocation in project hadoop by apache.
the class TestFavoredNodesEndToEnd method testFavoredNodesEndToEndForAppend.
@Test(timeout = 180000)
public void testFavoredNodesEndToEndForAppend() throws Exception {
// create 10 files with random preferred nodes
for (int i = 0; i < NUM_FILES; i++) {
Random rand = new Random(System.currentTimeMillis() + i);
// pass a new created rand so as to get a uniform distribution each time
// without too much collisions (look at the do-while loop in getDatanodes)
InetSocketAddress[] datanode = getDatanodes(rand);
Path p = new Path("/filename" + i);
// create and close the file.
dfs.create(p, FsPermission.getDefault(), true, 4096, (short) 3, 4096L, null, null).close();
// re-open for append
FSDataOutputStream out = dfs.append(p, EnumSet.of(CreateFlag.APPEND), 4096, null, datanode);
out.write(SOME_BYTES);
out.close();
BlockLocation[] locations = getBlockLocations(p);
// verify the files got created in the right nodes
for (BlockLocation loc : locations) {
String[] hosts = loc.getNames();
String[] hosts1 = getStringForInetSocketAddrs(datanode);
assertTrue(compareNodes(hosts, hosts1));
}
}
}
use of org.apache.hadoop.fs.BlockLocation in project hadoop by apache.
the class TestFavoredNodesEndToEnd method testWhenSomeNodesAreNotGood.
@Test(timeout = 180000)
public void testWhenSomeNodesAreNotGood() throws Exception {
// 4 favored nodes
final InetSocketAddress[] addrs = new InetSocketAddress[4];
final String[] hosts = new String[addrs.length];
for (int i = 0; i < addrs.length; i++) {
addrs[i] = datanodes.get(i).getXferAddress();
hosts[i] = addrs[i].getAddress().getHostAddress() + ":" + addrs[i].getPort();
}
//make some datanode not "good" so that even if the client prefers it,
//the namenode would not give it as a replica to write to
DatanodeInfo d = cluster.getNameNode().getNamesystem().getBlockManager().getDatanodeManager().getDatanodeByXferAddr(addrs[0].getAddress().getHostAddress(), addrs[0].getPort());
//set the decommission status to true so that
//BlockPlacementPolicyDefault.isGoodTarget returns false for this dn
d.setDecommissioned();
Path p = new Path("/filename-foo-bar-baz");
final short replication = (short) 3;
FSDataOutputStream out = dfs.create(p, FsPermission.getDefault(), true, 4096, replication, 4096L, null, addrs);
out.write(SOME_BYTES);
out.close();
//reset the state
d.stopDecommission();
BlockLocation[] locations = getBlockLocations(p);
Assert.assertEquals(replication, locations[0].getNames().length);
;
//also make sure that the datanode[0] is not in the list of hosts
for (int i = 0; i < replication; i++) {
final String loc = locations[0].getNames()[i];
int j = 0;
for (; j < hosts.length && !loc.equals(hosts[j]); j++) ;
Assert.assertTrue("j=" + j, j > 0);
Assert.assertTrue("loc=" + loc + " not in host list " + Arrays.asList(hosts) + ", j=" + j, j < hosts.length);
}
}
Aggregations