Search in sources :

Example 1 with HFileCorruptionChecker

use of org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker in project hbase by apache.

the class TestHBaseFsckOneRS method testQuarantineMissingRegionDir.

/**
   * This creates a table and simulates the race situation where a concurrent compaction or split
   * has removed a region dir before the corruption checker got to it.
   */
@Test(timeout = 180000)
public void testQuarantineMissingRegionDir() throws Exception {
    final TableName tableName = TableName.valueOf(name.getMethodName());
    // inject a fault in the hfcc created.
    final FileSystem fs = FileSystem.get(conf);
    HBaseFsck hbck = new HBaseFsck(conf, hbfsckExecutorService) {

        @Override
        public HFileCorruptionChecker createHFileCorruptionChecker(boolean sidelineCorruptHFiles) throws IOException {
            return new HFileCorruptionChecker(conf, executor, sidelineCorruptHFiles) {

                AtomicBoolean attemptedFirstHFile = new AtomicBoolean(false);

                @Override
                protected void checkRegionDir(Path p) throws IOException {
                    if (attemptedFirstHFile.compareAndSet(false, true)) {
                        // make sure delete happened.
                        assertTrue(fs.delete(p, true));
                    }
                    super.checkRegionDir(p);
                }
            };
        }
    };
    doQuarantineTest(tableName, hbck, 3, 0, 0, 0, 1);
    hbck.close();
}
Also used : Path(org.apache.hadoop.fs.Path) TableName(org.apache.hadoop.hbase.TableName) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) FileSystem(org.apache.hadoop.fs.FileSystem) HFileCorruptionChecker(org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker) Test(org.junit.Test)

Example 2 with HFileCorruptionChecker

use of org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker in project hbase by apache.

the class TestHBaseFsckOneRS method testQuarantineMissingFamdir.

/**
   * This creates a table and simulates the race situation where a concurrent compaction or split
   * has removed an colfam dir before the corruption checker got to it.
   */
// Disabled because fails sporadically.  Is this test right?  Timing-wise, there could be no
// files in a column family on initial creation -- as suggested by Matteo.
@Ignore
@Test(timeout = 180000)
public void testQuarantineMissingFamdir() throws Exception {
    final TableName tableName = TableName.valueOf(name.getMethodName());
    // inject a fault in the hfcc created.
    final FileSystem fs = FileSystem.get(conf);
    HBaseFsck hbck = new HBaseFsck(conf, hbfsckExecutorService) {

        @Override
        public HFileCorruptionChecker createHFileCorruptionChecker(boolean sidelineCorruptHFiles) throws IOException {
            return new HFileCorruptionChecker(conf, executor, sidelineCorruptHFiles) {

                AtomicBoolean attemptedFirstHFile = new AtomicBoolean(false);

                @Override
                protected void checkColFamDir(Path p) throws IOException {
                    if (attemptedFirstHFile.compareAndSet(false, true)) {
                        // make sure delete happened.
                        assertTrue(fs.delete(p, true));
                    }
                    super.checkColFamDir(p);
                }
            };
        }
    };
    doQuarantineTest(tableName, hbck, 3, 0, 0, 0, 1);
    hbck.close();
}
Also used : Path(org.apache.hadoop.fs.Path) TableName(org.apache.hadoop.hbase.TableName) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) FileSystem(org.apache.hadoop.fs.FileSystem) HFileCorruptionChecker(org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker) Ignore(org.junit.Ignore) Test(org.junit.Test)

Example 3 with HFileCorruptionChecker

use of org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker in project hbase by apache.

the class HBaseFsck method exec.

public HBaseFsck exec(ExecutorService exec, String[] args) throws KeeperException, IOException, InterruptedException, ReplicationException {
    long sleepBeforeRerun = DEFAULT_SLEEP_BEFORE_RERUN;
    boolean checkCorruptHFiles = false;
    boolean sidelineCorruptHFiles = false;
    // Process command-line args.
    for (int i = 0; i < args.length; i++) {
        String cmd = args[i];
        if (cmd.equals("-help") || cmd.equals("-h")) {
            return printUsageAndExit();
        } else if (cmd.equals("-details")) {
            setDisplayFullReport();
        } else if (cmd.equals("-exclusive")) {
            setForceExclusive();
        } else if (cmd.equals("-timelag")) {
            if (i == args.length - 1) {
                errors.reportError(ERROR_CODE.WRONG_USAGE, "HBaseFsck: -timelag needs a value.");
                return printUsageAndExit();
            }
            try {
                long timelag = Long.parseLong(args[++i]);
                setTimeLag(timelag);
            } catch (NumberFormatException e) {
                errors.reportError(ERROR_CODE.WRONG_USAGE, "-timelag needs a numeric value.");
                return printUsageAndExit();
            }
        } else if (cmd.equals("-sleepBeforeRerun")) {
            if (i == args.length - 1) {
                errors.reportError(ERROR_CODE.WRONG_USAGE, "HBaseFsck: -sleepBeforeRerun needs a value.");
                return printUsageAndExit();
            }
            try {
                sleepBeforeRerun = Long.parseLong(args[++i]);
            } catch (NumberFormatException e) {
                errors.reportError(ERROR_CODE.WRONG_USAGE, "-sleepBeforeRerun needs a numeric value.");
                return printUsageAndExit();
            }
        } else if (cmd.equals("-sidelineDir")) {
            if (i == args.length - 1) {
                errors.reportError(ERROR_CODE.WRONG_USAGE, "HBaseFsck: -sidelineDir needs a value.");
                return printUsageAndExit();
            }
            setSidelineDir(args[++i]);
        } else if (cmd.equals("-fix")) {
            errors.reportError(ERROR_CODE.WRONG_USAGE, "This option is deprecated, please use  -fixAssignments instead.");
            setFixAssignments(true);
        } else if (cmd.equals("-fixAssignments")) {
            setFixAssignments(true);
        } else if (cmd.equals("-fixMeta")) {
            setFixMeta(true);
        } else if (cmd.equals("-noHdfsChecking")) {
            setCheckHdfs(false);
        } else if (cmd.equals("-fixHdfsHoles")) {
            setFixHdfsHoles(true);
        } else if (cmd.equals("-fixHdfsOrphans")) {
            setFixHdfsOrphans(true);
        } else if (cmd.equals("-fixTableOrphans")) {
            setFixTableOrphans(true);
        } else if (cmd.equals("-fixHdfsOverlaps")) {
            setFixHdfsOverlaps(true);
        } else if (cmd.equals("-fixVersionFile")) {
            setFixVersionFile(true);
        } else if (cmd.equals("-sidelineBigOverlaps")) {
            setSidelineBigOverlaps(true);
        } else if (cmd.equals("-fixSplitParents")) {
            setFixSplitParents(true);
        } else if (cmd.equals("-removeParents")) {
            setRemoveParents(true);
        } else if (cmd.equals("-ignorePreCheckPermission")) {
            setIgnorePreCheckPermission(true);
        } else if (cmd.equals("-checkCorruptHFiles")) {
            checkCorruptHFiles = true;
        } else if (cmd.equals("-sidelineCorruptHFiles")) {
            sidelineCorruptHFiles = true;
        } else if (cmd.equals("-fixReferenceFiles")) {
            setFixReferenceFiles(true);
        } else if (cmd.equals("-fixHFileLinks")) {
            setFixHFileLinks(true);
        } else if (cmd.equals("-fixEmptyMetaCells")) {
            setFixEmptyMetaCells(true);
        } else if (cmd.equals("-repair")) {
            // this attempts to merge overlapping hdfs regions, needs testing
            // under load
            setFixHdfsHoles(true);
            setFixHdfsOrphans(true);
            setFixMeta(true);
            setFixAssignments(true);
            setFixHdfsOverlaps(true);
            setFixVersionFile(true);
            setSidelineBigOverlaps(true);
            setFixSplitParents(false);
            setCheckHdfs(true);
            setFixReferenceFiles(true);
            setFixHFileLinks(true);
        } else if (cmd.equals("-repairHoles")) {
            // this will make all missing hdfs regions available but may lose data
            setFixHdfsHoles(true);
            setFixHdfsOrphans(false);
            setFixMeta(true);
            setFixAssignments(true);
            setFixHdfsOverlaps(false);
            setSidelineBigOverlaps(false);
            setFixSplitParents(false);
            setCheckHdfs(true);
        } else if (cmd.equals("-maxOverlapsToSideline")) {
            if (i == args.length - 1) {
                errors.reportError(ERROR_CODE.WRONG_USAGE, "-maxOverlapsToSideline needs a numeric value argument.");
                return printUsageAndExit();
            }
            try {
                int maxOverlapsToSideline = Integer.parseInt(args[++i]);
                setMaxOverlapsToSideline(maxOverlapsToSideline);
            } catch (NumberFormatException e) {
                errors.reportError(ERROR_CODE.WRONG_USAGE, "-maxOverlapsToSideline needs a numeric value argument.");
                return printUsageAndExit();
            }
        } else if (cmd.equals("-maxMerge")) {
            if (i == args.length - 1) {
                errors.reportError(ERROR_CODE.WRONG_USAGE, "-maxMerge needs a numeric value argument.");
                return printUsageAndExit();
            }
            try {
                int maxMerge = Integer.parseInt(args[++i]);
                setMaxMerge(maxMerge);
            } catch (NumberFormatException e) {
                errors.reportError(ERROR_CODE.WRONG_USAGE, "-maxMerge needs a numeric value argument.");
                return printUsageAndExit();
            }
        } else if (cmd.equals("-summary")) {
            setSummary();
        } else if (cmd.equals("-metaonly")) {
            setCheckMetaOnly();
        } else if (cmd.equals("-boundaries")) {
            setRegionBoundariesCheck();
        } else if (cmd.equals("-fixReplication")) {
            setFixReplication(true);
        } else if (cmd.equals("-cleanReplicationBarrier")) {
            setCleanReplicationBarrier(true);
            if (args[++i].startsWith("-")) {
                printUsageAndExit();
            }
            setCleanReplicationBarrierTable(args[i]);
        } else if (cmd.startsWith("-")) {
            errors.reportError(ERROR_CODE.WRONG_USAGE, "Unrecognized option:" + cmd);
            return printUsageAndExit();
        } else {
            includeTable(TableName.valueOf(cmd));
            errors.print("Allow checking/fixes for table: " + cmd);
        }
    }
    errors.print("HBaseFsck command line options: " + StringUtils.join(args, " "));
    // pre-check current user has FS write permission or not
    try {
        preCheckPermission();
    } catch (IOException ioe) {
        Runtime.getRuntime().exit(-1);
    }
    // do the real work of hbck
    connect();
    // check if unsupported option is specified based on server version
    if (!isOptionsSupported(args)) {
        return printUsageAndExit();
    }
    try {
        // if corrupt file mode is on, first fix them since they may be opened later
        if (checkCorruptHFiles || sidelineCorruptHFiles) {
            LOG.info("Checking all hfiles for corruption");
            HFileCorruptionChecker hfcc = createHFileCorruptionChecker(sidelineCorruptHFiles);
            // so we can get result
            setHFileCorruptionChecker(hfcc);
            Collection<TableName> tables = getIncludedTables();
            Collection<Path> tableDirs = new ArrayList<>();
            Path rootdir = CommonFSUtils.getRootDir(getConf());
            if (tables.size() > 0) {
                for (TableName t : tables) {
                    tableDirs.add(CommonFSUtils.getTableDir(rootdir, t));
                }
            } else {
                tableDirs = FSUtils.getTableDirs(CommonFSUtils.getCurrentFileSystem(getConf()), rootdir);
            }
            hfcc.checkTables(tableDirs);
            hfcc.report(errors);
        }
        // check and fix table integrity, region consistency.
        int code = onlineHbck();
        setRetCode(code);
        // an infinite loop.
        if (shouldRerun()) {
            try {
                LOG.info("Sleeping " + sleepBeforeRerun + "ms before re-checking after fix...");
                Thread.sleep(sleepBeforeRerun);
            } catch (InterruptedException ie) {
                LOG.warn("Interrupted while sleeping");
                return this;
            }
            // Just report
            setFixAssignments(false);
            setFixMeta(false);
            setFixHdfsHoles(false);
            setFixHdfsOverlaps(false);
            setFixVersionFile(false);
            setFixTableOrphans(false);
            errors.resetErrors();
            code = onlineHbck();
            setRetCode(code);
        }
    } finally {
        IOUtils.closeQuietly(this, e -> LOG.warn("", e));
    }
    return this;
}
Also used : Path(org.apache.hadoop.fs.Path) ArrayList(java.util.ArrayList) InterruptedIOException(java.io.InterruptedIOException) IOException(java.io.IOException) TableName(org.apache.hadoop.hbase.TableName) HFileCorruptionChecker(org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker)

Example 4 with HFileCorruptionChecker

use of org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker in project hbase by apache.

the class TestHBaseFsckMOB method testQuarantineCorruptMobFile.

/**
 * This creates a table and then corrupts a mob file.  Hbck should quarantine the file.
 */
@SuppressWarnings("deprecation")
@Test
public void testQuarantineCorruptMobFile() throws Exception {
    TableName table = TableName.valueOf(name.getMethodName());
    try {
        setupMobTable(table);
        assertEquals(ROWKEYS.length, countRows());
        admin.flush(table);
        FileSystem fs = FileSystem.get(conf);
        Path mobFile = getFlushedMobFile(fs, table);
        admin.disableTable(table);
        // create new corrupt mob file.
        String corruptMobFile = createMobFileName(mobFile.getName());
        Path corrupt = new Path(mobFile.getParent(), corruptMobFile);
        TestHFile.truncateFile(fs, mobFile, corrupt);
        LOG.info("Created corrupted mob file " + corrupt);
        HBaseFsck.debugLsr(conf, CommonFSUtils.getRootDir(conf));
        HBaseFsck.debugLsr(conf, MobUtils.getMobHome(conf));
        // A corrupt mob file doesn't abort the start of regions, so we can enable the table.
        admin.enableTable(table);
        HBaseFsck res = HbckTestingUtil.doHFileQuarantine(conf, table);
        assertEquals(0, res.getRetCode());
        HFileCorruptionChecker hfcc = res.getHFilecorruptionChecker();
        assertEquals(4, hfcc.getHFilesChecked());
        assertEquals(0, hfcc.getCorrupted().size());
        assertEquals(0, hfcc.getFailures().size());
        assertEquals(0, hfcc.getQuarantined().size());
        assertEquals(0, hfcc.getMissing().size());
        assertEquals(5, hfcc.getMobFilesChecked());
        assertEquals(1, hfcc.getCorruptedMobFiles().size());
        assertEquals(0, hfcc.getFailureMobFiles().size());
        assertEquals(1, hfcc.getQuarantinedMobFiles().size());
        assertEquals(0, hfcc.getMissedMobFiles().size());
        String quarantinedMobFile = hfcc.getQuarantinedMobFiles().iterator().next().getName();
        assertEquals(corruptMobFile, quarantinedMobFile);
    } finally {
        cleanupTable(table);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) TableName(org.apache.hadoop.hbase.TableName) FileSystem(org.apache.hadoop.fs.FileSystem) HFileCorruptionChecker(org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker) Test(org.junit.Test)

Example 5 with HFileCorruptionChecker

use of org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker in project hbase by apache.

the class TestHBaseFsckEncryption method testFsckWithEncryption.

@Test
public void testFsckWithEncryption() throws Exception {
    // Populate the table with some data
    Table table = TEST_UTIL.getConnection().getTable(tableDescriptor.getTableName());
    try {
        byte[] values = { 'A', 'B', 'C', 'D' };
        for (int i = 0; i < values.length; i++) {
            for (int j = 0; j < values.length; j++) {
                Put put = new Put(new byte[] { values[i], values[j] });
                put.addColumn(Bytes.toBytes("cf"), new byte[] {}, new byte[] { values[i], values[j] });
                table.put(put);
            }
        }
    } finally {
        table.close();
    }
    // Flush it
    TEST_UTIL.getAdmin().flush(tableDescriptor.getTableName());
    // Verify we have encrypted store files on disk
    final List<Path> paths = findStorefilePaths(tableDescriptor.getTableName());
    assertTrue(paths.size() > 0);
    for (Path path : paths) {
        assertTrue("Store file " + path + " has incorrect key", Bytes.equals(cfKey.getEncoded(), extractHFileKey(path)));
    }
    // Insure HBck doesn't consider them corrupt
    HBaseFsck res = HbckTestingUtil.doHFileQuarantine(conf, tableDescriptor.getTableName());
    assertEquals(0, res.getRetCode());
    HFileCorruptionChecker hfcc = res.getHFilecorruptionChecker();
    assertEquals(0, hfcc.getCorrupted().size());
    assertEquals(0, hfcc.getFailures().size());
    assertEquals(0, hfcc.getQuarantined().size());
    assertEquals(0, hfcc.getMissing().size());
}
Also used : Path(org.apache.hadoop.fs.Path) Table(org.apache.hadoop.hbase.client.Table) HFileCorruptionChecker(org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker) Put(org.apache.hadoop.hbase.client.Put) Test(org.junit.Test)

Aggregations

HFileCorruptionChecker (org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker)9 Path (org.apache.hadoop.fs.Path)8 FileSystem (org.apache.hadoop.fs.FileSystem)6 TableName (org.apache.hadoop.hbase.TableName)6 Test (org.junit.Test)6 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)3 IOException (java.io.IOException)1 InterruptedIOException (java.io.InterruptedIOException)1 ArrayList (java.util.ArrayList)1 ExecutorService (java.util.concurrent.ExecutorService)1 Put (org.apache.hadoop.hbase.client.Put)1 Table (org.apache.hadoop.hbase.client.Table)1 Ignore (org.junit.Ignore)1