use of org.apache.hadoop.fs.ChecksumException in project hadoop by apache.
the class TestBlockReaderLocalLegacy method testStablePositionAfterCorruptRead.
/**
* Test that, in the case of an error, the position and limit of a ByteBuffer
* are left unchanged. This is not mandated by ByteBufferReadable, but clients
* of this class might immediately issue a retry on failure, so it's polite.
*/
@Test
public void testStablePositionAfterCorruptRead() throws Exception {
final short REPL_FACTOR = 1;
final long FILE_LENGTH = 512L;
HdfsConfiguration conf = getConfiguration(null);
MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
cluster.waitActive();
FileSystem fs = cluster.getFileSystem();
Path path = new Path("/corrupted");
DFSTestUtil.createFile(fs, path, FILE_LENGTH, REPL_FACTOR, 12345L);
DFSTestUtil.waitReplication(fs, path, REPL_FACTOR);
ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, path);
int blockFilesCorrupted = cluster.corruptBlockOnDataNodes(block);
assertEquals("All replicas not corrupted", REPL_FACTOR, blockFilesCorrupted);
FSDataInputStream dis = cluster.getFileSystem().open(path);
ByteBuffer buf = ByteBuffer.allocateDirect((int) FILE_LENGTH);
boolean sawException = false;
try {
dis.read(buf);
} catch (ChecksumException ex) {
sawException = true;
}
assertTrue(sawException);
assertEquals(0, buf.position());
assertEquals(buf.capacity(), buf.limit());
dis = cluster.getFileSystem().open(path);
buf.position(3);
buf.limit(25);
sawException = false;
try {
dis.read(buf);
} catch (ChecksumException ex) {
sawException = true;
}
assertTrue(sawException);
assertEquals(3, buf.position());
assertEquals(25, buf.limit());
cluster.shutdown();
}
use of org.apache.hadoop.fs.ChecksumException in project hadoop by apache.
the class TestEditLog method testEditChecksum.
@Test
public void testEditChecksum() throws Exception {
// start a cluster
Configuration conf = getConf();
MiniDFSCluster cluster = null;
FileSystem fileSys = null;
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(NUM_DATA_NODES).build();
cluster.waitActive();
fileSys = cluster.getFileSystem();
final FSNamesystem namesystem = cluster.getNamesystem();
FSImage fsimage = namesystem.getFSImage();
final FSEditLog editLog = fsimage.getEditLog();
fileSys.mkdirs(new Path("/tmp"));
Iterator<StorageDirectory> iter = fsimage.getStorage().dirIterator(NameNodeDirType.EDITS);
LinkedList<StorageDirectory> sds = new LinkedList<StorageDirectory>();
while (iter.hasNext()) {
sds.add(iter.next());
}
editLog.close();
cluster.shutdown();
for (StorageDirectory sd : sds) {
File editFile = NNStorage.getFinalizedEditsFile(sd, 1, 3);
assertTrue(editFile.exists());
long fileLen = editFile.length();
LOG.debug("Corrupting Log File: " + editFile + " len: " + fileLen);
RandomAccessFile rwf = new RandomAccessFile(editFile, "rw");
// seek to checksum bytes
rwf.seek(fileLen - 4);
int b = rwf.readInt();
rwf.seek(fileLen - 4);
rwf.writeInt(b + 1);
rwf.close();
}
try {
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(NUM_DATA_NODES).format(false).build();
fail("should not be able to start");
} catch (IOException e) {
// expected
assertNotNull("Cause of exception should be ChecksumException", e.getCause());
assertEquals("Cause of exception should be ChecksumException", ChecksumException.class, e.getCause().getClass());
}
}
use of org.apache.hadoop.fs.ChecksumException in project hadoop by apache.
the class TestFsck method testUnderMinReplicatedBlock.
@Test
public void testUnderMinReplicatedBlock() throws Exception {
conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000);
// Set short retry timeouts so this test runs faster
conf.setInt(HdfsClientConfigKeys.Retry.WINDOW_BASE_KEY, 10);
// Set minReplication to 2
short minReplication = 2;
conf.setInt(DFSConfigKeys.DFS_NAMENODE_REPLICATION_MIN_KEY, minReplication);
FileSystem fs = null;
DFSClient dfsClient = null;
LocatedBlocks blocks = null;
int replicaCount = 0;
Random random = new Random();
String outStr = null;
short factor = 1;
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(2).build();
cluster.waitActive();
fs = cluster.getFileSystem();
Path file1 = new Path("/testUnderMinReplicatedBlock");
DFSTestUtil.createFile(fs, file1, 1024, minReplication, 0);
// Wait until file replication has completed
DFSTestUtil.waitReplication(fs, file1, minReplication);
ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, file1);
// Make sure filesystem is in healthy state
outStr = runFsck(conf, 0, true, "/");
System.out.println(outStr);
assertTrue(outStr.contains(NamenodeFsck.HEALTHY_STATUS));
// corrupt the first replica
File blockFile = cluster.getBlockFile(0, block);
if (blockFile != null && blockFile.exists()) {
RandomAccessFile raFile = new RandomAccessFile(blockFile, "rw");
FileChannel channel = raFile.getChannel();
String badString = "BADBAD";
int rand = random.nextInt((int) channel.size() / 2);
raFile.seek(rand);
raFile.write(badString.getBytes());
raFile.close();
}
dfsClient = new DFSClient(new InetSocketAddress("localhost", cluster.getNameNodePort()), conf);
blocks = dfsClient.getNamenode().getBlockLocations(file1.toString(), 0, Long.MAX_VALUE);
replicaCount = blocks.get(0).getLocations().length;
while (replicaCount != factor) {
try {
Thread.sleep(100);
// Read the file to trigger reportBadBlocks
try {
IOUtils.copyBytes(fs.open(file1), new IOUtils.NullOutputStream(), conf, true);
} catch (IOException ie) {
assertTrue(ie instanceof ChecksumException);
}
System.out.println("sleep in try: replicaCount=" + replicaCount + " factor=" + factor);
} catch (InterruptedException ignore) {
}
blocks = dfsClient.getNamenode().getBlockLocations(file1.toString(), 0, Long.MAX_VALUE);
replicaCount = blocks.get(0).getLocations().length;
}
// Check if fsck reports the same
outStr = runFsck(conf, 0, true, "/");
System.out.println(outStr);
assertTrue(outStr.contains(NamenodeFsck.HEALTHY_STATUS));
assertTrue(outStr.contains("UNDER MIN REPL'D BLOCKS:\t1 (100.0 %)"));
assertTrue(outStr.contains("dfs.namenode.replication.min:\t2"));
}
use of org.apache.hadoop.fs.ChecksumException in project hadoop by apache.
the class DFSInputStream method readWithStrategy.
protected synchronized int readWithStrategy(ReaderStrategy strategy) throws IOException {
dfsClient.checkOpen();
if (closed.get()) {
throw new IOException("Stream closed");
}
int len = strategy.getTargetLength();
CorruptedBlocks corruptedBlocks = new CorruptedBlocks();
failures = 0;
if (pos < getFileLength()) {
int retries = 2;
while (retries > 0) {
try {
// error on the same block. See HDFS-3067
if (pos > blockEnd || currentNode == null) {
currentNode = blockSeekTo(pos);
}
int realLen = (int) Math.min(len, (blockEnd - pos + 1L));
synchronized (infoLock) {
if (locatedBlocks.isLastBlockComplete()) {
realLen = (int) Math.min(realLen, locatedBlocks.getFileLength() - pos);
}
}
int result = readBuffer(strategy, realLen, corruptedBlocks);
if (result >= 0) {
pos += result;
} else {
// got a EOS from reader though we expect more data on it.
throw new IOException("Unexpected EOS from the reader");
}
return result;
} catch (ChecksumException ce) {
throw ce;
} catch (IOException e) {
checkInterrupted(e);
if (retries == 1) {
DFSClient.LOG.warn("DFS Read", e);
}
blockEnd = -1;
if (currentNode != null) {
addToDeadNodes(currentNode);
}
if (--retries == 0) {
throw e;
}
} finally {
// Check if need to report block replicas corruption either read
// was successful or ChecksumException occured.
reportCheckSumFailure(corruptedBlocks, currentLocatedBlock.getLocations().length, false);
}
}
}
return -1;
}
use of org.apache.hadoop.fs.ChecksumException in project hadoop by apache.
the class DFSInputStream method actualGetFromOneDataNode.
/**
* Read data from one DataNode.
*
* @param datanode the datanode from which to read data
* @param block the located block containing the requested data
* @param startInBlk the startInBlk offset of the block
* @param endInBlk the endInBlk offset of the block
* @param buf the given byte buffer into which the data is read
* @param corruptedBlocks map recording list of datanodes with corrupted
* block replica
*/
void actualGetFromOneDataNode(final DNAddrPair datanode, LocatedBlock block, final long startInBlk, final long endInBlk, ByteBuffer buf, CorruptedBlocks corruptedBlocks) throws IOException {
DFSClientFaultInjector.get().startFetchFromDatanode();
// only need to get a new access token once
int refetchToken = 1;
// only need to get a new encryption key once
int refetchEncryptionKey = 1;
final int len = (int) (endInBlk - startInBlk + 1);
while (true) {
// cached block locations may have been updated by chooseDataNode()
// or fetchBlockAt(). Always get the latest list of locations at the
// start of the loop.
block = refreshLocatedBlock(block);
BlockReader reader = null;
try {
DFSClientFaultInjector.get().fetchFromDatanodeException();
reader = getBlockReader(block, startInBlk, len, datanode.addr, datanode.storageType, datanode.info);
//Behave exactly as the readAll() call
ByteBuffer tmp = buf.duplicate();
tmp.limit(tmp.position() + len);
tmp = tmp.slice();
int nread = 0;
int ret;
while (true) {
ret = reader.read(tmp);
if (ret <= 0) {
break;
}
nread += ret;
}
buf.position(buf.position() + nread);
IOUtilsClient.updateReadStatistics(readStatistics, nread, reader);
dfsClient.updateFileSystemReadStats(reader.getNetworkDistance(), nread);
if (nread != len) {
throw new IOException("truncated return from reader.read(): " + "excpected " + len + ", got " + nread);
}
DFSClientFaultInjector.get().readFromDatanodeDelay();
return;
} catch (ChecksumException e) {
String msg = "fetchBlockByteRange(). Got a checksum exception for " + src + " at " + block.getBlock() + ":" + e.getPos() + " from " + datanode.info;
DFSClient.LOG.warn(msg);
// we want to remember what we have tried
corruptedBlocks.addCorruptedBlock(block.getBlock(), datanode.info);
addToDeadNodes(datanode.info);
throw new IOException(msg);
} catch (IOException e) {
checkInterrupted(e);
if (e instanceof InvalidEncryptionKeyException && refetchEncryptionKey > 0) {
DFSClient.LOG.info("Will fetch a new encryption key and retry, " + "encryption key was invalid when connecting to " + datanode.addr + " : " + e);
// The encryption key used is invalid.
refetchEncryptionKey--;
dfsClient.clearDataEncryptionKey();
} else if (refetchToken > 0 && tokenRefetchNeeded(e, datanode.addr)) {
refetchToken--;
try {
fetchBlockAt(block.getStartOffset());
} catch (IOException fbae) {
// ignore IOE, since we can retry it later in a loop
}
} else {
String msg = "Failed to connect to " + datanode.addr + " for file " + src + " for block " + block.getBlock() + ":" + e;
DFSClient.LOG.warn("Connection failure: " + msg, e);
addToDeadNodes(datanode.info);
throw new IOException(msg);
}
} finally {
if (reader != null) {
reader.close();
}
}
}
}
Aggregations