use of org.apache.hadoop.fs.BlockLocation in project hadoop by apache.
the class TestFileAppend method testFailedAppendBlockRejection.
/**
* Old replica of the block should not be accepted as valid for append/read
*/
@Test
public void testFailedAppendBlockRejection() throws Exception {
Configuration conf = new HdfsConfiguration();
conf.set("dfs.client.block.write.replace-datanode-on-failure.enable", "false");
MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
DistributedFileSystem fs = null;
try {
fs = cluster.getFileSystem();
Path path = new Path("/test");
FSDataOutputStream out = fs.create(path);
out.writeBytes("hello\n");
out.close();
// stop one datanode
DataNodeProperties dnProp = cluster.stopDataNode(0);
String dnAddress = dnProp.datanode.getXferAddress().toString();
if (dnAddress.startsWith("/")) {
dnAddress = dnAddress.substring(1);
}
// append again to bump genstamps
for (int i = 0; i < 2; i++) {
out = fs.append(path);
out.writeBytes("helloagain\n");
out.close();
}
// re-open and make the block state as underconstruction
out = fs.append(path);
cluster.restartDataNode(dnProp, true);
// wait till the block report comes
Thread.sleep(2000);
// check the block locations, this should not contain restarted datanode
BlockLocation[] locations = fs.getFileBlockLocations(path, 0, Long.MAX_VALUE);
String[] names = locations[0].getNames();
for (String node : names) {
if (node.equals(dnAddress)) {
fail("Failed append should not be present in latest block locations.");
}
}
out.close();
} finally {
IOUtils.closeStream(fs);
cluster.shutdown();
}
}
use of org.apache.hadoop.fs.BlockLocation in project drill by apache.
the class TestAffinityCalculator method testBuildRangeMap.
// @Test
// public void testSetEndpointBytes(@Injectable final FileSystem fs, @Injectable final FileStatus file) throws Throwable{
// final long blockSize = 256*1024*1024;
// LinkedList<ParquetGroupScan.RowGroupInfo> rowGroups = new LinkedList<>();
// int numberOfHosts = 4;
// int numberOfBlocks = 3;
// String port = "1234";
// String[] hosts = new String[numberOfHosts];
//
// final BlockLocation[] blockLocations = buildBlockLocations(hosts, blockSize);
// final LinkedList<CoordinationProtos.DrillbitEndpoint> endPoints = buildEndpoints(numberOfHosts);
// buildRowGroups(rowGroups, numberOfBlocks, blockSize, 3);
//
// new NonStrictExpectations() {{
// fs.getFileBlockLocations(file, 0, 3*blockSize); result = blockLocations;
// fs.getFileStatus(new Path(path)); result = file;
// file.getLen(); result = 3*blockSize;
// }};
//
//
// BlockMapBuilder ac = new BlockMapBuilder(fs, endPoints);
// for (ParquetGroupScan.RowGroupInfo rowGroup : rowGroups) {
// ac.setEndpointBytes(rowGroup);
// }
// ParquetGroupScan.RowGroupInfo rg = rowGroups.get(0);
// Long b = rg.getEndpointBytes().get(endPoints.get(0));
// assertEquals(blockSize,b.longValue());
// b = rg.getEndpointBytes().get(endPoints.get(3));
// assertNull(b);
//
// buildRowGroups(rowGroups, numberOfBlocks, blockSize, 2);
//
// ac = new BlockMapBuilder(fs, endPoints);
// for (ParquetGroupScan.RowGroupInfo rowGroup : rowGroups) {
// ac.setEndpointBytes(rowGroup);
// }
// rg = rowGroups.get(0);
// b = rg.getEndpointBytes().get(endPoints.get(0));
// assertEquals(blockSize*3/2,b.longValue());
// b = rg.getEndpointBytes().get(endPoints.get(3));
// assertEquals(blockSize / 2, b.longValue());
//
// buildRowGroups(rowGroups, numberOfBlocks, blockSize, 6);
//
// ac = new BlockMapBuilder(fs, endPoints);
// for (ParquetGroupScan.RowGroupInfo rowGroup : rowGroups) {
// ac.setEndpointBytes(rowGroup);
// }
// rg = rowGroups.get(0);
// b = rg.getEndpointBytes().get(endPoints.get(0));
// assertEquals(blockSize/2,b.longValue());
// b = rg.getEndpointBytes().get(endPoints.get(3));
// assertNull(b);
// }
@Test
public void testBuildRangeMap() {
BlockLocation[] blocks = buildBlockLocations(new String[4], 256 * 1024 * 1024);
long tA = System.nanoTime();
ImmutableRangeMap.Builder<Long, BlockLocation> blockMapBuilder = new ImmutableRangeMap.Builder<Long, BlockLocation>();
for (BlockLocation block : blocks) {
long start = block.getOffset();
long end = start + block.getLength();
Range<Long> range = Range.closedOpen(start, end);
blockMapBuilder = blockMapBuilder.put(range, block);
}
ImmutableRangeMap<Long, BlockLocation> map = blockMapBuilder.build();
long tB = System.nanoTime();
System.out.println(String.format("Took %f ms to build range map", (tB - tA) / 1e6));
}
use of org.apache.hadoop.fs.BlockLocation in project asterixdb by apache.
the class HDFSUtils method getSplits.
/**
* Instead of creating the split using the input format, we do it manually
* This function returns fileSplits (1 per hdfs file block) irrespective of the number of partitions
* and the produced splits only cover intersection between current files in hdfs and files stored internally
* in AsterixDB
* 1. NoOp means appended file
* 2. AddOp means new file
* 3. UpdateOp means the delta of a file
*
* @return
* @throws IOException
*/
public static InputSplit[] getSplits(JobConf conf, List<ExternalFile> files) throws IOException {
// Create file system object
FileSystem fs = FileSystem.get(conf);
ArrayList<FileSplit> fileSplits = new ArrayList<>();
ArrayList<ExternalFile> orderedExternalFiles = new ArrayList<>();
// Create files splits
for (ExternalFile file : files) {
Path filePath = new Path(file.getFileName());
FileStatus fileStatus;
try {
fileStatus = fs.getFileStatus(filePath);
} catch (FileNotFoundException e) {
// file was deleted at some point, skip to next file
continue;
}
if (file.getPendingOp() == ExternalFilePendingOp.ADD_OP && fileStatus.getModificationTime() == file.getLastModefiedTime().getTime()) {
// Get its information from HDFS name node
BlockLocation[] fileBlocks = fs.getFileBlockLocations(fileStatus, 0, file.getSize());
// Create a split per block
for (BlockLocation block : fileBlocks) {
if (block.getOffset() < file.getSize()) {
fileSplits.add(new FileSplit(filePath, block.getOffset(), (block.getLength() + block.getOffset()) < file.getSize() ? block.getLength() : (file.getSize() - block.getOffset()), block.getHosts()));
orderedExternalFiles.add(file);
}
}
} else if (file.getPendingOp() == ExternalFilePendingOp.NO_OP && fileStatus.getModificationTime() == file.getLastModefiedTime().getTime()) {
long oldSize = 0L;
long newSize = file.getSize();
for (int i = 0; i < files.size(); i++) {
if (files.get(i).getFileName() == file.getFileName() && files.get(i).getSize() != file.getSize()) {
newSize = files.get(i).getSize();
oldSize = file.getSize();
break;
}
}
// Get its information from HDFS name node
BlockLocation[] fileBlocks = fs.getFileBlockLocations(fileStatus, 0, newSize);
// Create a split per block
for (BlockLocation block : fileBlocks) {
if (block.getOffset() + block.getLength() > oldSize) {
if (block.getOffset() < newSize) {
// Block interact with delta -> Create a split
long startCut = (block.getOffset() > oldSize) ? 0L : oldSize - block.getOffset();
long endCut = (block.getOffset() + block.getLength() < newSize) ? 0L : block.getOffset() + block.getLength() - newSize;
long splitLength = block.getLength() - startCut - endCut;
fileSplits.add(new FileSplit(filePath, block.getOffset() + startCut, splitLength, block.getHosts()));
orderedExternalFiles.add(file);
}
}
}
}
}
fs.close();
files.clear();
files.addAll(orderedExternalFiles);
return fileSplits.toArray(new FileSplit[fileSplits.size()]);
}
use of org.apache.hadoop.fs.BlockLocation in project hadoop by apache.
the class NativeAzureFileSystem method getFileBlockLocations.
/**
* Return an array containing hostnames, offset and size of
* portions of the given file. For WASB we'll just lie and give
* fake hosts to make sure we get many splits in MR jobs.
*/
@Override
public BlockLocation[] getFileBlockLocations(FileStatus file, long start, long len) throws IOException {
if (file == null) {
return null;
}
if ((start < 0) || (len < 0)) {
throw new IllegalArgumentException("Invalid start or len parameter");
}
if (file.getLen() < start) {
return new BlockLocation[0];
}
final String blobLocationHost = getConf().get(AZURE_BLOCK_LOCATION_HOST_PROPERTY_NAME, AZURE_BLOCK_LOCATION_HOST_DEFAULT);
final String[] name = { blobLocationHost };
final String[] host = { blobLocationHost };
long blockSize = file.getBlockSize();
if (blockSize <= 0) {
throw new IllegalArgumentException("The block size for the given file is not a positive number: " + blockSize);
}
int numberOfLocations = (int) (len / blockSize) + ((len % blockSize == 0) ? 0 : 1);
BlockLocation[] locations = new BlockLocation[numberOfLocations];
for (int i = 0; i < locations.length; i++) {
long currentOffset = start + (i * blockSize);
long currentLength = Math.min(blockSize, start + len - currentOffset);
locations[i] = new BlockLocation(name, host, currentOffset, currentLength);
}
return locations;
}
use of org.apache.hadoop.fs.BlockLocation in project hadoop by apache.
the class TestNativeAzureFileSystemBlockLocations method getBlockLocationsOutput.
private static BlockLocation[] getBlockLocationsOutput(int fileSize, int blockSize, long start, long len, String blockLocationHost) throws Exception {
Configuration conf = new Configuration();
conf.set(NativeAzureFileSystem.AZURE_BLOCK_SIZE_PROPERTY_NAME, "" + blockSize);
if (blockLocationHost != null) {
conf.set(NativeAzureFileSystem.AZURE_BLOCK_LOCATION_HOST_PROPERTY_NAME, blockLocationHost);
}
AzureBlobStorageTestAccount testAccount = AzureBlobStorageTestAccount.createMock(conf);
FileSystem fs = testAccount.getFileSystem();
Path testFile = createTestFile(fs, fileSize);
FileStatus stat = fs.getFileStatus(testFile);
BlockLocation[] locations = fs.getFileBlockLocations(stat, start, len);
testAccount.cleanup();
return locations;
}
Aggregations