use of org.apache.hadoop.fs.FSDataOutputStream in project hadoop by apache.
the class TestPipelinesFailover method doTestWriteOverFailoverWithDnFail.
private void doTestWriteOverFailoverWithDnFail(TestScenario scenario) throws Exception {
Configuration conf = new Configuration();
conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
FSDataOutputStream stm = null;
MiniDFSCluster cluster = newMiniCluster(conf, 5);
try {
cluster.waitActive();
cluster.transitionToActive(0);
Thread.sleep(500);
LOG.info("Starting with NN 0 active");
FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
stm = fs.create(TEST_PATH);
// write a block and a half
AppendTestUtil.write(stm, 0, BLOCK_AND_A_HALF);
// Make sure all the blocks are written before failover
stm.hflush();
int nextActive = failover(cluster, scenario);
assertTrue(fs.exists(TEST_PATH));
cluster.stopDataNode(0);
// write another block and a half
AppendTestUtil.write(stm, BLOCK_AND_A_HALF, BLOCK_AND_A_HALF);
stm.hflush();
LOG.info("Failing back from NN " + nextActive + " to NN 0");
cluster.transitionToStandby(nextActive);
cluster.transitionToActive(0);
cluster.stopDataNode(1);
AppendTestUtil.write(stm, BLOCK_AND_A_HALF * 2, BLOCK_AND_A_HALF);
stm.hflush();
stm.close();
stm = null;
AppendTestUtil.check(fs, TEST_PATH, BLOCK_AND_A_HALF * 3);
} finally {
IOUtils.closeStream(stm);
cluster.shutdown();
}
}
use of org.apache.hadoop.fs.FSDataOutputStream in project hadoop by apache.
the class TestPipelinesFailover method doWriteOverFailoverTest.
private void doWriteOverFailoverTest(TestScenario scenario, MethodToTestIdempotence methodToTest) throws Exception {
Configuration conf = new Configuration();
conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
// Don't check low redundancy periodically.
conf.setInt(DFSConfigKeys.DFS_NAMENODE_REDUNDANCY_INTERVAL_SECONDS_KEY, 1000);
FSDataOutputStream stm = null;
MiniDFSCluster cluster = newMiniCluster(conf, 3);
try {
int sizeWritten = 0;
cluster.waitActive();
cluster.transitionToActive(0);
Thread.sleep(500);
LOG.info("Starting with NN 0 active");
FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
stm = fs.create(TEST_PATH);
// write a block and a half
AppendTestUtil.write(stm, 0, BLOCK_AND_A_HALF);
sizeWritten += BLOCK_AND_A_HALF;
// Make sure all of the blocks are written out before failover.
stm.hflush();
LOG.info("Failing over to another NN");
int activeIndex = failover(cluster, scenario);
// NOTE: explicitly do *not* make any further metadata calls
// to the NN here. The next IPC call should be to allocate the next
// block. Any other call would notice the failover and not test
// idempotence of the operation (HDFS-3031)
FSNamesystem ns1 = cluster.getNameNode(activeIndex).getNamesystem();
BlockManagerTestUtil.updateState(ns1.getBlockManager());
assertEquals(0, ns1.getPendingReplicationBlocks());
assertEquals(0, ns1.getCorruptReplicaBlocks());
assertEquals(0, ns1.getMissingBlocksCount());
// completeFile() if we're testing idempotence of that operation.
if (methodToTest == MethodToTestIdempotence.ALLOCATE_BLOCK) {
// write another block and a half
AppendTestUtil.write(stm, sizeWritten, BLOCK_AND_A_HALF);
sizeWritten += BLOCK_AND_A_HALF;
}
stm.close();
stm = null;
AppendTestUtil.check(fs, TEST_PATH, sizeWritten);
} finally {
IOUtils.closeStream(stm);
cluster.shutdown();
}
}
use of org.apache.hadoop.fs.FSDataOutputStream in project hadoop by apache.
the class TestPipelinesFailover method testFailoverRightBeforeCommitSynchronization.
/**
* Test the scenario where the NN fails over after issuing a block
* synchronization request, but before it is committed. The
* DN running the recovery should then fail to commit the synchronization
* and a later retry will succeed.
*/
@Test(timeout = 30000)
public void testFailoverRightBeforeCommitSynchronization() throws Exception {
final Configuration conf = new Configuration();
// Disable permissions so that another user can recover the lease.
conf.setBoolean(DFSConfigKeys.DFS_PERMISSIONS_ENABLED_KEY, false);
conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
FSDataOutputStream stm = null;
final MiniDFSCluster cluster = newMiniCluster(conf, 3);
try {
cluster.waitActive();
cluster.transitionToActive(0);
Thread.sleep(500);
LOG.info("Starting with NN 0 active");
FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
stm = fs.create(TEST_PATH);
// write a half block
AppendTestUtil.write(stm, 0, BLOCK_SIZE / 2);
stm.hflush();
// Look into the block manager on the active node for the block
// under construction.
NameNode nn0 = cluster.getNameNode(0);
ExtendedBlock blk = DFSTestUtil.getFirstBlock(fs, TEST_PATH);
DatanodeDescriptor expectedPrimary = DFSTestUtil.getExpectedPrimaryNode(nn0, blk);
LOG.info("Expecting block recovery to be triggered on DN " + expectedPrimary);
// Find the corresponding DN daemon, and spy on its connection to the
// active.
DataNode primaryDN = cluster.getDataNode(expectedPrimary.getIpcPort());
DatanodeProtocolClientSideTranslatorPB nnSpy = InternalDataNodeTestUtils.spyOnBposToNN(primaryDN, nn0);
// Delay the commitBlockSynchronization call
DelayAnswer delayer = new DelayAnswer(LOG);
Mockito.doAnswer(delayer).when(nnSpy).commitBlockSynchronization(Mockito.eq(blk), // new genstamp
Mockito.anyInt(), // new length
Mockito.anyLong(), // close file
Mockito.eq(true), // delete block
Mockito.eq(false), // new targets
(DatanodeID[]) Mockito.anyObject(), // new target storages
(String[]) Mockito.anyObject());
DistributedFileSystem fsOtherUser = createFsAsOtherUser(cluster, conf);
assertFalse(fsOtherUser.recoverLease(TEST_PATH));
LOG.info("Waiting for commitBlockSynchronization call from primary");
delayer.waitForCall();
LOG.info("Failing over to NN 1");
cluster.transitionToStandby(0);
cluster.transitionToActive(1);
// Let the commitBlockSynchronization call go through, and check that
// it failed with the correct exception.
delayer.proceed();
delayer.waitForResult();
Throwable t = delayer.getThrown();
if (t == null) {
fail("commitBlockSynchronization call did not fail on standby");
}
GenericTestUtils.assertExceptionContains("Operation category WRITE is not supported", t);
// Now, if we try again to recover the block, it should succeed on the new
// active.
loopRecoverLease(fsOtherUser, TEST_PATH);
AppendTestUtil.check(fs, TEST_PATH, BLOCK_SIZE / 2);
} finally {
IOUtils.closeStream(stm);
cluster.shutdown();
}
}
use of org.apache.hadoop.fs.FSDataOutputStream in project hadoop by apache.
the class TestDNFencing method testRBWReportArrivesAfterEdits.
/**
* Another regression test for HDFS-2742. This tests the following sequence:
* - DN does a block report while file is open. This BR contains
* the block in RBW state.
* - The block report is delayed in reaching the standby.
* - The file is closed.
* - The standby processes the OP_ADD and OP_CLOSE operations before
* the RBW block report arrives.
* - The standby should not mark the block as corrupt.
*/
@Test
public void testRBWReportArrivesAfterEdits() throws Exception {
final CountDownLatch brFinished = new CountDownLatch(1);
DelayAnswer delayer = new GenericTestUtils.DelayAnswer(LOG) {
@Override
protected Object passThrough(InvocationOnMock invocation) throws Throwable {
try {
return super.passThrough(invocation);
} finally {
// inform the test that our block report went through.
brFinished.countDown();
}
}
};
FSDataOutputStream out = fs.create(TEST_FILE_PATH);
try {
AppendTestUtil.write(out, 0, 10);
out.hflush();
DataNode dn = cluster.getDataNodes().get(0);
DatanodeProtocolClientSideTranslatorPB spy = InternalDataNodeTestUtils.spyOnBposToNN(dn, nn2);
Mockito.doAnswer(delayer).when(spy).blockReport(Mockito.<DatanodeRegistration>anyObject(), Mockito.anyString(), Mockito.<StorageBlockReport[]>anyObject(), Mockito.<BlockReportContext>anyObject());
dn.scheduleAllBlockReport(0);
delayer.waitForCall();
} finally {
IOUtils.closeStream(out);
}
cluster.transitionToStandby(0);
cluster.transitionToActive(1);
delayer.proceed();
brFinished.await();
// Verify that no replicas are marked corrupt, and that the
// file is readable from the failed-over standby.
BlockManagerTestUtil.updateState(nn1.getNamesystem().getBlockManager());
BlockManagerTestUtil.updateState(nn2.getNamesystem().getBlockManager());
assertEquals(0, nn1.getNamesystem().getCorruptReplicaBlocks());
assertEquals(0, nn2.getNamesystem().getCorruptReplicaBlocks());
DFSTestUtil.readFile(fs, TEST_FILE_PATH);
}
use of org.apache.hadoop.fs.FSDataOutputStream in project hadoop by apache.
the class TestHASafeMode method testAppendWhileInSafeMode.
/**
* Tests that the standby node properly tracks the number of total
* and safe blocks while it is in safe mode. Since safe-mode only
* counts completed blocks, append needs to decrement the total
* number of blocks and then re-increment when the file is closed
* again.
*/
@Test
public void testAppendWhileInSafeMode() throws Exception {
banner("Starting with NN0 active and NN1 standby, creating some blocks");
// Make 4.5 blocks so that append() will re-open an existing block
// instead of just adding a new one
DFSTestUtil.createFile(fs, new Path("/test"), 4 * BLOCK_SIZE + BLOCK_SIZE / 2, (short) 3, 1L);
// Roll edit log so that, when the SBN restarts, it will load
// the namespace during startup.
nn0.getRpcServer().rollEditLog();
banner("Restarting standby");
restartStandby();
// It will initially have all of the blocks necessary.
assertSafeMode(nn1, 5, 5, 3, 0);
// Append to a block while SBN is in safe mode. This should
// not affect safemode initially, since the DN message
// will get queued.
FSDataOutputStream stm = fs.append(new Path("/test"));
try {
assertSafeMode(nn1, 5, 5, 3, 0);
// if we roll edits now, the SBN should see that it's under construction
// and change its total count and safe count down by one, since UC
// blocks are not counted by safe mode.
HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
assertSafeMode(nn1, 4, 4, 3, 0);
} finally {
IOUtils.closeStream(stm);
}
// Delete those blocks while the SBN is in safe mode.
// This will not ACK the deletions to the SBN, so it won't
// notice until we roll the edit log.
banner("Removing the blocks without rolling the edit log");
fs.delete(new Path("/test"), true);
BlockManagerTestUtil.computeAllPendingWork(nn0.getNamesystem().getBlockManager());
banner("Triggering deletions on DNs and Deletion Reports");
cluster.triggerHeartbeats();
HATestUtil.waitForDNDeletions(cluster);
cluster.triggerDeletionReports();
assertSafeMode(nn1, 4, 4, 3, 0);
// When we roll the edit log, the deletions will go through.
banner("Waiting for standby to catch up to active namespace");
HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
assertSafeMode(nn1, 0, 0, 3, 0);
}
Aggregations