Search in sources :

Example 1 with Holder

use of org.apache.hadoop.hdfs.util.Holder in project hadoop by apache.

the class TestFileAppendRestart method testAppendRestart.

/**
   * Regression test for HDFS-2991. Creates and appends to files
   * where blocks start/end on block boundaries.
   */
@Test
public void testAppendRestart() throws Exception {
    final Configuration conf = new HdfsConfiguration();
    // Turn off persistent IPC, so that the DFSClient can survive NN restart
    conf.setInt(CommonConfigurationKeysPublic.IPC_CLIENT_CONNECTION_MAXIDLETIME_KEY, 0);
    MiniDFSCluster cluster = null;
    FSDataOutputStream stream = null;
    try {
        cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
        FileSystem fs = cluster.getFileSystem();
        File editLog = new File(FSImageTestUtil.getNameNodeCurrentDirs(cluster, 0).get(0), NNStorage.getInProgressEditsFileName(1));
        EnumMap<FSEditLogOpCodes, Holder<Integer>> counts;
        Path p1 = new Path("/block-boundaries");
        writeAndAppend(fs, p1, BLOCK_SIZE, BLOCK_SIZE);
        counts = FSImageTestUtil.countEditLogOpTypes(editLog);
        // OP_ADD to create file
        // OP_ADD_BLOCK for first block
        // OP_CLOSE to close file
        // OP_APPEND to reopen file
        // OP_ADD_BLOCK for second block
        // OP_CLOSE to close file
        assertEquals(1, (int) counts.get(FSEditLogOpCodes.OP_ADD).held);
        assertEquals(1, (int) counts.get(FSEditLogOpCodes.OP_APPEND).held);
        assertEquals(2, (int) counts.get(FSEditLogOpCodes.OP_ADD_BLOCK).held);
        assertEquals(2, (int) counts.get(FSEditLogOpCodes.OP_CLOSE).held);
        Path p2 = new Path("/not-block-boundaries");
        writeAndAppend(fs, p2, BLOCK_SIZE / 2, BLOCK_SIZE);
        counts = FSImageTestUtil.countEditLogOpTypes(editLog);
        // OP_ADD to create file
        // OP_ADD_BLOCK for first block
        // OP_CLOSE to close file
        // OP_APPEND to re-establish the lease
        // OP_UPDATE_BLOCKS from the updatePipeline call (increments genstamp of last block)
        // OP_ADD_BLOCK at the start of the second block
        // OP_CLOSE to close file
        // Total: 2 OP_ADDs, 1 OP_UPDATE_BLOCKS, 2 OP_ADD_BLOCKs, and 2 OP_CLOSEs
        //       in addition to the ones above
        assertEquals(2, (int) counts.get(FSEditLogOpCodes.OP_ADD).held);
        assertEquals(2, (int) counts.get(FSEditLogOpCodes.OP_APPEND).held);
        assertEquals(1, (int) counts.get(FSEditLogOpCodes.OP_UPDATE_BLOCKS).held);
        assertEquals(2 + 2, (int) counts.get(FSEditLogOpCodes.OP_ADD_BLOCK).held);
        assertEquals(2 + 2, (int) counts.get(FSEditLogOpCodes.OP_CLOSE).held);
        cluster.restartNameNode();
        AppendTestUtil.check(fs, p1, 2 * BLOCK_SIZE);
        AppendTestUtil.check(fs, p2, 3 * BLOCK_SIZE / 2);
    } finally {
        IOUtils.closeStream(stream);
        if (cluster != null) {
            cluster.shutdown();
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) FSEditLogOpCodes(org.apache.hadoop.hdfs.server.namenode.FSEditLogOpCodes) Configuration(org.apache.hadoop.conf.Configuration) FileSystem(org.apache.hadoop.fs.FileSystem) Holder(org.apache.hadoop.hdfs.util.Holder) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) File(java.io.File) Test(org.junit.Test)

Example 2 with Holder

use of org.apache.hadoop.hdfs.util.Holder in project hadoop by apache.

the class FSEditLogLoader method loadEditRecords.

long loadEditRecords(EditLogInputStream in, boolean closeOnExit, long expectedStartingTxId, StartupOption startOpt, MetaRecoveryContext recovery) throws IOException {
    FSDirectory fsDir = fsNamesys.dir;
    EnumMap<FSEditLogOpCodes, Holder<Integer>> opCounts = new EnumMap<FSEditLogOpCodes, Holder<Integer>>(FSEditLogOpCodes.class);
    if (LOG.isTraceEnabled()) {
        LOG.trace("Acquiring write lock to replay edit log");
    }
    fsNamesys.writeLock();
    fsDir.writeLock();
    long[] recentOpcodeOffsets = new long[4];
    Arrays.fill(recentOpcodeOffsets, -1);
    long expectedTxId = expectedStartingTxId;
    long numEdits = 0;
    long lastTxId = in.getLastTxId();
    long numTxns = (lastTxId - expectedStartingTxId) + 1;
    StartupProgress prog = NameNode.getStartupProgress();
    Step step = createStartupProgressStep(in);
    prog.setTotal(Phase.LOADING_EDITS, step, numTxns);
    Counter counter = prog.getCounter(Phase.LOADING_EDITS, step);
    long lastLogTime = monotonicNow();
    long lastInodeId = fsNamesys.dir.getLastInodeId();
    try {
        while (true) {
            try {
                FSEditLogOp op;
                try {
                    op = in.readOp();
                    if (op == null) {
                        break;
                    }
                } catch (Throwable e) {
                    // Handle a problem with our input
                    check203UpgradeFailure(in.getVersion(true), e);
                    String errorMessage = formatEditLogReplayError(in, recentOpcodeOffsets, expectedTxId);
                    FSImage.LOG.error(errorMessage, e);
                    if (recovery == null) {
                        // recovery mode.
                        throw new EditLogInputException(errorMessage, e, numEdits);
                    }
                    MetaRecoveryContext.editLogLoaderPrompt("We failed to read txId " + expectedTxId, recovery, "skipping the bad section in the log");
                    in.resync();
                    continue;
                }
                recentOpcodeOffsets[(int) (numEdits % recentOpcodeOffsets.length)] = in.getPosition();
                if (op.hasTransactionId()) {
                    if (op.getTransactionId() > expectedTxId) {
                        MetaRecoveryContext.editLogLoaderPrompt("There appears " + "to be a gap in the edit log.  We expected txid " + expectedTxId + ", but got txid " + op.getTransactionId() + ".", recovery, "ignoring missing " + " transaction IDs");
                    } else if (op.getTransactionId() < expectedTxId) {
                        MetaRecoveryContext.editLogLoaderPrompt("There appears " + "to be an out-of-order edit in the edit log.  We " + "expected txid " + expectedTxId + ", but got txid " + op.getTransactionId() + ".", recovery, "skipping the out-of-order edit");
                        continue;
                    }
                }
                try {
                    if (LOG.isTraceEnabled()) {
                        LOG.trace("op=" + op + ", startOpt=" + startOpt + ", numEdits=" + numEdits + ", totalEdits=" + totalEdits);
                    }
                    long inodeId = applyEditLogOp(op, fsDir, startOpt, in.getVersion(true), lastInodeId);
                    if (lastInodeId < inodeId) {
                        lastInodeId = inodeId;
                    }
                } catch (RollingUpgradeOp.RollbackException e) {
                    throw e;
                } catch (Throwable e) {
                    LOG.error("Encountered exception on operation " + op, e);
                    if (recovery == null) {
                        throw e instanceof IOException ? (IOException) e : new IOException(e);
                    }
                    MetaRecoveryContext.editLogLoaderPrompt("Failed to " + "apply edit log operation " + op + ": error " + e.getMessage(), recovery, "applying edits");
                }
                // Now that the operation has been successfully decoded and
                // applied, update our bookkeeping.
                incrOpCount(op.opCode, opCounts, step, counter);
                if (op.hasTransactionId()) {
                    lastAppliedTxId = op.getTransactionId();
                    expectedTxId = lastAppliedTxId + 1;
                } else {
                    expectedTxId = lastAppliedTxId = expectedStartingTxId;
                }
                // log progress
                if (op.hasTransactionId()) {
                    long now = monotonicNow();
                    if (now - lastLogTime > REPLAY_TRANSACTION_LOG_INTERVAL) {
                        long deltaTxId = lastAppliedTxId - expectedStartingTxId + 1;
                        int percent = Math.round((float) deltaTxId / numTxns * 100);
                        LOG.info("replaying edit log: " + deltaTxId + "/" + numTxns + " transactions completed. (" + percent + "%)");
                        lastLogTime = now;
                    }
                }
                numEdits++;
                totalEdits++;
            } catch (RollingUpgradeOp.RollbackException e) {
                LOG.info("Stopped at OP_START_ROLLING_UPGRADE for rollback.");
                break;
            } catch (MetaRecoveryContext.RequestStopException e) {
                MetaRecoveryContext.LOG.warn("Stopped reading edit log at " + in.getPosition() + "/" + in.length());
                break;
            }
        }
    } finally {
        fsNamesys.dir.resetLastInodeId(lastInodeId);
        if (closeOnExit) {
            in.close();
        }
        fsDir.writeUnlock();
        fsNamesys.writeUnlock("loadEditRecords");
        if (LOG.isTraceEnabled()) {
            LOG.trace("replaying edit log finished");
        }
        if (FSImage.LOG.isDebugEnabled()) {
            dumpOpCounts(opCounts);
        }
    }
    return numEdits;
}
Also used : Holder(org.apache.hadoop.hdfs.util.Holder) Step(org.apache.hadoop.hdfs.server.namenode.startupprogress.Step) IOException(java.io.IOException) Counter(org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress.Counter) RollingUpgradeOp(org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RollingUpgradeOp) EnumMap(java.util.EnumMap) StartupProgress(org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress)

Example 3 with Holder

use of org.apache.hadoop.hdfs.util.Holder in project hadoop by apache.

the class TestQJMWithFaults method testRandomized.

/**
   * Test case in which three JournalNodes randomly flip flop between
   * up and down states every time they get an RPC.
   * 
   * The writer keeps track of the latest ACKed edit, and on every
   * recovery operation, ensures that it recovers at least to that
   * point or higher. Since at any given point, a majority of JNs
   * may be injecting faults, any writer operation is allowed to fail,
   * so long as the exception message indicates it failed due to injected
   * faults.
   * 
   * Given a random seed, the test should be entirely deterministic.
   */
@Test
public void testRandomized() throws Exception {
    long seed;
    Long userSpecifiedSeed = Long.getLong(RAND_SEED_PROPERTY);
    if (userSpecifiedSeed != null) {
        LOG.info("Using seed specified in system property");
        seed = userSpecifiedSeed;
        // If the user specifies a seed, then we should gather all the
        // IPC trace information so that debugging is easier. This makes
        // the test run about 25% slower otherwise.
        GenericTestUtils.setLogLevel(ProtobufRpcEngine.LOG, Level.ALL);
    } else {
        seed = new Random().nextLong();
    }
    LOG.info("Random seed: " + seed);
    Random r = new Random(seed);
    MiniJournalCluster cluster = new MiniJournalCluster.Builder(conf).build();
    cluster.waitActive();
    // Format the cluster using a non-faulty QJM.
    QuorumJournalManager qjmForInitialFormat = createInjectableQJM(cluster);
    qjmForInitialFormat.format(FAKE_NSINFO);
    qjmForInitialFormat.close();
    try {
        long txid = 0;
        long lastAcked = 0;
        for (int i = 0; i < NUM_WRITER_ITERS; i++) {
            LOG.info("Starting writer " + i + "\n-------------------");
            QuorumJournalManager qjm = createRandomFaultyQJM(cluster, r);
            try {
                long recovered;
                try {
                    recovered = QJMTestUtil.recoverAndReturnLastTxn(qjm);
                } catch (Throwable t) {
                    LOG.info("Failed recovery", t);
                    checkException(t);
                    continue;
                }
                assertTrue("Recovered only up to txnid " + recovered + " but had gotten an ack for " + lastAcked, recovered >= lastAcked);
                txid = recovered + 1;
                // at failure cases.
                if (txid > 100 && i % 10 == 1) {
                    qjm.purgeLogsOlderThan(txid - 100);
                }
                Holder<Throwable> thrown = new Holder<Throwable>(null);
                for (int j = 0; j < SEGMENTS_PER_WRITER; j++) {
                    lastAcked = writeSegmentUntilCrash(cluster, qjm, txid, 4, thrown);
                    if (thrown.held != null) {
                        LOG.info("Failed write", thrown.held);
                        checkException(thrown.held);
                        break;
                    }
                    txid += 4;
                }
            } finally {
                qjm.close();
            }
        }
    } finally {
        cluster.shutdown();
    }
}
Also used : Random(java.util.Random) MiniJournalCluster(org.apache.hadoop.hdfs.qjournal.MiniJournalCluster) Holder(org.apache.hadoop.hdfs.util.Holder) Test(org.junit.Test)

Example 4 with Holder

use of org.apache.hadoop.hdfs.util.Holder in project hadoop by apache.

the class TestEditLogFileInputStream method testReadURL.

@Test
public void testReadURL() throws Exception {
    HttpURLConnection conn = mock(HttpURLConnection.class);
    doReturn(new ByteArrayInputStream(FAKE_LOG_DATA)).when(conn).getInputStream();
    doReturn(HttpURLConnection.HTTP_OK).when(conn).getResponseCode();
    doReturn(Integer.toString(FAKE_LOG_DATA.length)).when(conn).getHeaderField("Content-Length");
    URLConnectionFactory factory = mock(URLConnectionFactory.class);
    doReturn(conn).when(factory).openConnection(Mockito.<URL>any(), anyBoolean());
    URL url = new URL("http://localhost/fakeLog");
    EditLogInputStream elis = EditLogFileInputStream.fromUrl(factory, url, HdfsServerConstants.INVALID_TXID, HdfsServerConstants.INVALID_TXID, false);
    // Read the edit log and verify that we got all of the data.
    EnumMap<FSEditLogOpCodes, Holder<Integer>> counts = FSImageTestUtil.countEditLogOpTypes(elis);
    assertThat(counts.get(FSEditLogOpCodes.OP_ADD).held, is(1));
    assertThat(counts.get(FSEditLogOpCodes.OP_SET_GENSTAMP_V1).held, is(1));
    assertThat(counts.get(FSEditLogOpCodes.OP_CLOSE).held, is(1));
    // Check that length header was picked up.
    assertEquals(FAKE_LOG_DATA.length, elis.length());
    elis.close();
}
Also used : URLConnectionFactory(org.apache.hadoop.hdfs.web.URLConnectionFactory) HttpURLConnection(java.net.HttpURLConnection) ByteArrayInputStream(java.io.ByteArrayInputStream) Holder(org.apache.hadoop.hdfs.util.Holder) URL(java.net.URL) Test(org.junit.Test)

Aggregations

Holder (org.apache.hadoop.hdfs.util.Holder)4 Test (org.junit.Test)3 ByteArrayInputStream (java.io.ByteArrayInputStream)1 File (java.io.File)1 IOException (java.io.IOException)1 HttpURLConnection (java.net.HttpURLConnection)1 URL (java.net.URL)1 EnumMap (java.util.EnumMap)1 Random (java.util.Random)1 Configuration (org.apache.hadoop.conf.Configuration)1 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)1 FileSystem (org.apache.hadoop.fs.FileSystem)1 Path (org.apache.hadoop.fs.Path)1 MiniJournalCluster (org.apache.hadoop.hdfs.qjournal.MiniJournalCluster)1 RollingUpgradeOp (org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RollingUpgradeOp)1 FSEditLogOpCodes (org.apache.hadoop.hdfs.server.namenode.FSEditLogOpCodes)1 StartupProgress (org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress)1 Counter (org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress.Counter)1 Step (org.apache.hadoop.hdfs.server.namenode.startupprogress.Step)1 URLConnectionFactory (org.apache.hadoop.hdfs.web.URLConnectionFactory)1