Search in sources :

Example 16 with RegionLocator

use of org.apache.hadoop.hbase.client.RegionLocator in project hbase by apache.

the class TestHFileOutputFormat2 method testExcludeAllFromMinorCompaction.

/**
   * This test is to test the scenario happened in HBASE-6901.
   * All files are bulk loaded and excluded from minor compaction.
   * Without the fix of HBASE-6901, an ArrayIndexOutOfBoundsException
   * will be thrown.
   */
@Ignore("Flakey: See HBASE-9051")
@Test
public void testExcludeAllFromMinorCompaction() throws Exception {
    Configuration conf = util.getConfiguration();
    conf.setInt("hbase.hstore.compaction.min", 2);
    generateRandomStartKeys(5);
    util.startMiniCluster();
    try (Connection conn = ConnectionFactory.createConnection();
        Admin admin = conn.getAdmin();
        Table table = util.createTable(TABLE_NAME, FAMILIES);
        RegionLocator locator = conn.getRegionLocator(TABLE_NAME)) {
        final FileSystem fs = util.getDFSCluster().getFileSystem();
        assertEquals("Should start with empty table", 0, util.countRows(table));
        // deep inspection: get the StoreFile dir
        final Path storePath = new Path(FSUtils.getTableDir(FSUtils.getRootDir(conf), TABLE_NAME), new Path(admin.getTableRegions(TABLE_NAME).get(0).getEncodedName(), Bytes.toString(FAMILIES[0])));
        assertEquals(0, fs.listStatus(storePath).length);
        // Generate two bulk load files
        conf.setBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude", true);
        for (int i = 0; i < 2; i++) {
            Path testDir = util.getDataTestDirOnTestFS("testExcludeAllFromMinorCompaction_" + i);
            runIncrementalPELoad(conf, table.getTableDescriptor(), conn.getRegionLocator(TABLE_NAME), testDir, false);
            // Perform the actual load
            new LoadIncrementalHFiles(conf).doBulkLoad(testDir, admin, table, locator);
        }
        // Ensure data shows up
        int expectedRows = 2 * NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT;
        assertEquals("LoadIncrementalHFiles should put expected data in table", expectedRows, util.countRows(table));
        // should have a second StoreFile now
        assertEquals(2, fs.listStatus(storePath).length);
        // minor compactions shouldn't get rid of the file
        admin.compact(TABLE_NAME);
        try {
            quickPoll(new Callable<Boolean>() {

                @Override
                public Boolean call() throws Exception {
                    List<HRegion> regions = util.getMiniHBaseCluster().getRegions(TABLE_NAME);
                    for (HRegion region : regions) {
                        for (Store store : region.getStores()) {
                            store.closeAndArchiveCompactedFiles();
                        }
                    }
                    return fs.listStatus(storePath).length == 1;
                }
            }, 5000);
            throw new IOException("SF# = " + fs.listStatus(storePath).length);
        } catch (AssertionError ae) {
        // this is expected behavior
        }
        // a major compaction should work though
        admin.majorCompact(TABLE_NAME);
        quickPoll(new Callable<Boolean>() {

            @Override
            public Boolean call() throws Exception {
                List<HRegion> regions = util.getMiniHBaseCluster().getRegions(TABLE_NAME);
                for (HRegion region : regions) {
                    for (Store store : region.getStores()) {
                        store.closeAndArchiveCompactedFiles();
                    }
                }
                return fs.listStatus(storePath).length == 1;
            }
        }, 5000);
    } finally {
        util.shutdownMiniCluster();
    }
}
Also used : Path(org.apache.hadoop.fs.Path) RegionLocator(org.apache.hadoop.hbase.client.RegionLocator) Table(org.apache.hadoop.hbase.client.Table) Configuration(org.apache.hadoop.conf.Configuration) HBaseConfiguration(org.apache.hadoop.hbase.HBaseConfiguration) Connection(org.apache.hadoop.hbase.client.Connection) Store(org.apache.hadoop.hbase.regionserver.Store) IOException(java.io.IOException) Admin(org.apache.hadoop.hbase.client.Admin) UnsupportedEncodingException(java.io.UnsupportedEncodingException) IOException(java.io.IOException) HRegion(org.apache.hadoop.hbase.regionserver.HRegion) FileSystem(org.apache.hadoop.fs.FileSystem) DistributedFileSystem(org.apache.hadoop.hdfs.DistributedFileSystem) List(java.util.List) ArrayList(java.util.ArrayList) Ignore(org.junit.Ignore) Test(org.junit.Test)

Example 17 with RegionLocator

use of org.apache.hadoop.hbase.client.RegionLocator in project hbase by apache.

the class TestHFileOutputFormat2 method testColumnFamilySettings.

/**
   * Test that {@link HFileOutputFormat2} RecordWriter uses compression and
   * bloom filter settings from the column family descriptor
   */
@Ignore("Goes zombie too frequently; needs work. See HBASE-14563")
@Test
public void testColumnFamilySettings() throws Exception {
    Configuration conf = new Configuration(this.util.getConfiguration());
    RecordWriter<ImmutableBytesWritable, Cell> writer = null;
    TaskAttemptContext context = null;
    Path dir = util.getDataTestDir("testColumnFamilySettings");
    // Setup table descriptor
    Table table = Mockito.mock(Table.class);
    RegionLocator regionLocator = Mockito.mock(RegionLocator.class);
    HTableDescriptor htd = new HTableDescriptor(TABLE_NAME);
    Mockito.doReturn(htd).when(table).getTableDescriptor();
    for (HColumnDescriptor hcd : HBaseTestingUtility.generateColumnDescriptors()) {
        htd.addFamily(hcd);
    }
    // set up the table to return some mock keys
    setupMockStartKeys(regionLocator);
    try {
        // partial map red setup to get an operational writer for testing
        // We turn off the sequence file compression, because DefaultCodec
        // pollutes the GZip codec pool with an incompatible compressor.
        conf.set("io.seqfile.compression.type", "NONE");
        conf.set("hbase.fs.tmp.dir", dir.toString());
        // turn locality off to eliminate getRegionLocation fail-and-retry time when writing kvs
        conf.setBoolean(HFileOutputFormat2.LOCALITY_SENSITIVE_CONF_KEY, false);
        Job job = new Job(conf, "testLocalMRIncrementalLoad");
        job.setWorkingDirectory(util.getDataTestDirOnTestFS("testColumnFamilySettings"));
        setupRandomGeneratorMapper(job, false);
        HFileOutputFormat2.configureIncrementalLoad(job, table.getTableDescriptor(), regionLocator);
        FileOutputFormat.setOutputPath(job, dir);
        context = createTestTaskAttemptContext(job);
        HFileOutputFormat2 hof = new HFileOutputFormat2();
        writer = hof.getRecordWriter(context);
        // write out random rows
        writeRandomKeyValues(writer, context, htd.getFamiliesKeys(), ROWSPERSPLIT);
        writer.close(context);
        // Make sure that a directory was created for every CF
        FileSystem fs = dir.getFileSystem(conf);
        // commit so that the filesystem has one directory per column family
        hof.getOutputCommitter(context).commitTask(context);
        hof.getOutputCommitter(context).commitJob(context);
        FileStatus[] families = FSUtils.listStatus(fs, dir, new FSUtils.FamilyDirFilter(fs));
        assertEquals(htd.getFamilies().size(), families.length);
        for (FileStatus f : families) {
            String familyStr = f.getPath().getName();
            HColumnDescriptor hcd = htd.getFamily(Bytes.toBytes(familyStr));
            // verify that the compression on this file matches the configured
            // compression
            Path dataFilePath = fs.listStatus(f.getPath())[0].getPath();
            Reader reader = HFile.createReader(fs, dataFilePath, new CacheConfig(conf), conf);
            Map<byte[], byte[]> fileInfo = reader.loadFileInfo();
            byte[] bloomFilter = fileInfo.get(StoreFile.BLOOM_FILTER_TYPE_KEY);
            if (bloomFilter == null)
                bloomFilter = Bytes.toBytes("NONE");
            assertEquals("Incorrect bloom filter used for column family " + familyStr + "(reader: " + reader + ")", hcd.getBloomFilterType(), BloomType.valueOf(Bytes.toString(bloomFilter)));
            assertEquals("Incorrect compression used for column family " + familyStr + "(reader: " + reader + ")", hcd.getCompressionType(), reader.getFileContext().getCompression());
        }
    } finally {
        dir.getFileSystem(conf).delete(dir, true);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) RegionLocator(org.apache.hadoop.hbase.client.RegionLocator) ImmutableBytesWritable(org.apache.hadoop.hbase.io.ImmutableBytesWritable) Table(org.apache.hadoop.hbase.client.Table) FileStatus(org.apache.hadoop.fs.FileStatus) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus) HdfsFileStatus(org.apache.hadoop.hdfs.protocol.HdfsFileStatus) Configuration(org.apache.hadoop.conf.Configuration) HBaseConfiguration(org.apache.hadoop.hbase.HBaseConfiguration) HColumnDescriptor(org.apache.hadoop.hbase.HColumnDescriptor) Reader(org.apache.hadoop.hbase.io.hfile.HFile.Reader) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) HTableDescriptor(org.apache.hadoop.hbase.HTableDescriptor) FileSystem(org.apache.hadoop.fs.FileSystem) DistributedFileSystem(org.apache.hadoop.hdfs.DistributedFileSystem) Job(org.apache.hadoop.mapreduce.Job) Cell(org.apache.hadoop.hbase.Cell) CacheConfig(org.apache.hadoop.hbase.io.hfile.CacheConfig) FSUtils(org.apache.hadoop.hbase.util.FSUtils) Ignore(org.junit.Ignore) Test(org.junit.Test)

Example 18 with RegionLocator

use of org.apache.hadoop.hbase.client.RegionLocator in project hbase by apache.

the class TestHFileOutputFormat2 method doIncrementalLoadTest.

private void doIncrementalLoadTest(boolean shouldChangeRegions, boolean shouldKeepLocality, boolean putSortReducer, String tableStr) throws Exception {
    util = new HBaseTestingUtility();
    Configuration conf = util.getConfiguration();
    conf.setBoolean(HFileOutputFormat2.LOCALITY_SENSITIVE_CONF_KEY, shouldKeepLocality);
    int hostCount = 1;
    int regionNum = 5;
    if (shouldKeepLocality) {
        // We should change host count higher than hdfs replica count when MiniHBaseCluster supports
        // explicit hostnames parameter just like MiniDFSCluster does.
        hostCount = 3;
        regionNum = 20;
    }
    byte[][] splitKeys = generateRandomSplitKeys(regionNum - 1);
    String[] hostnames = new String[hostCount];
    for (int i = 0; i < hostCount; ++i) {
        hostnames[i] = "datanode_" + i;
    }
    util.startMiniCluster(1, hostCount, hostnames);
    TableName tableName = TableName.valueOf(tableStr);
    Table table = util.createTable(tableName, FAMILIES, splitKeys);
    Path testDir = util.getDataTestDirOnTestFS("testLocalMRIncrementalLoad");
    FileSystem fs = testDir.getFileSystem(conf);
    try (RegionLocator r = util.getConnection().getRegionLocator(tableName);
        Admin admin = util.getConnection().getAdmin()) {
        assertEquals("Should start with empty table", 0, util.countRows(table));
        int numRegions = r.getStartKeys().length;
        assertEquals("Should make " + regionNum + " regions", numRegions, regionNum);
        // Generate the bulk load files
        runIncrementalPELoad(conf, table.getTableDescriptor(), r, testDir, putSortReducer);
        // This doesn't write into the table, just makes files
        assertEquals("HFOF should not touch actual table", 0, util.countRows(table));
        // Make sure that a directory was created for every CF
        int dir = 0;
        for (FileStatus f : testDir.getFileSystem(conf).listStatus(testDir)) {
            for (byte[] family : FAMILIES) {
                if (Bytes.toString(family).equals(f.getPath().getName())) {
                    ++dir;
                }
            }
        }
        assertEquals("Column family not found in FS.", FAMILIES.length, dir);
        // handle the split case
        if (shouldChangeRegions) {
            LOG.info("Changing regions in table");
            admin.disableTable(table.getName());
            util.waitUntilNoRegionsInTransition();
            util.deleteTable(table.getName());
            byte[][] newSplitKeys = generateRandomSplitKeys(14);
            table = util.createTable(tableName, FAMILIES, newSplitKeys);
            while (util.getConnection().getRegionLocator(tableName).getAllRegionLocations().size() != 15 || !admin.isTableAvailable(table.getName())) {
                Thread.sleep(200);
                LOG.info("Waiting for new region assignment to happen");
            }
        }
        // Perform the actual load
        new LoadIncrementalHFiles(conf).doBulkLoad(testDir, admin, table, r);
        // Ensure data shows up
        int expectedRows = 0;
        if (putSortReducer) {
            // no rows should be extracted
            assertEquals("LoadIncrementalHFiles should put expected data in table", expectedRows, util.countRows(table));
        } else {
            expectedRows = NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT;
            assertEquals("LoadIncrementalHFiles should put expected data in table", expectedRows, util.countRows(table));
            Scan scan = new Scan();
            ResultScanner results = table.getScanner(scan);
            for (Result res : results) {
                assertEquals(FAMILIES.length, res.rawCells().length);
                Cell first = res.rawCells()[0];
                for (Cell kv : res.rawCells()) {
                    assertTrue(CellUtil.matchingRow(first, kv));
                    assertTrue(Bytes.equals(CellUtil.cloneValue(first), CellUtil.cloneValue(kv)));
                }
            }
            results.close();
        }
        String tableDigestBefore = util.checksumRows(table);
        // Check region locality
        HDFSBlocksDistribution hbd = new HDFSBlocksDistribution();
        for (HRegion region : util.getHBaseCluster().getRegions(tableName)) {
            hbd.add(region.getHDFSBlocksDistribution());
        }
        for (String hostname : hostnames) {
            float locality = hbd.getBlockLocalityIndex(hostname);
            LOG.info("locality of [" + hostname + "]: " + locality);
            assertEquals(100, (int) (locality * 100));
        }
        // Cause regions to reopen
        admin.disableTable(tableName);
        while (!admin.isTableDisabled(tableName)) {
            Thread.sleep(200);
            LOG.info("Waiting for table to disable");
        }
        admin.enableTable(tableName);
        util.waitTableAvailable(tableName);
        assertEquals("Data should remain after reopening of regions", tableDigestBefore, util.checksumRows(table));
    } finally {
        testDir.getFileSystem(conf).delete(testDir, true);
        util.deleteTable(tableName);
        util.shutdownMiniCluster();
    }
}
Also used : Path(org.apache.hadoop.fs.Path) RegionLocator(org.apache.hadoop.hbase.client.RegionLocator) Table(org.apache.hadoop.hbase.client.Table) FileStatus(org.apache.hadoop.fs.FileStatus) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus) HdfsFileStatus(org.apache.hadoop.hdfs.protocol.HdfsFileStatus) ResultScanner(org.apache.hadoop.hbase.client.ResultScanner) Configuration(org.apache.hadoop.conf.Configuration) HBaseConfiguration(org.apache.hadoop.hbase.HBaseConfiguration) Admin(org.apache.hadoop.hbase.client.Admin) HDFSBlocksDistribution(org.apache.hadoop.hbase.HDFSBlocksDistribution) Result(org.apache.hadoop.hbase.client.Result) TableName(org.apache.hadoop.hbase.TableName) HRegion(org.apache.hadoop.hbase.regionserver.HRegion) HBaseTestingUtility(org.apache.hadoop.hbase.HBaseTestingUtility) FileSystem(org.apache.hadoop.fs.FileSystem) DistributedFileSystem(org.apache.hadoop.hdfs.DistributedFileSystem) Scan(org.apache.hadoop.hbase.client.Scan) Cell(org.apache.hadoop.hbase.Cell)

Example 19 with RegionLocator

use of org.apache.hadoop.hbase.client.RegionLocator in project hbase by apache.

the class TestHFileOutputFormat2 method testExcludeMinorCompaction.

@Ignore("Goes zombie too frequently; needs work. See HBASE-14563")
@Test
public void testExcludeMinorCompaction() throws Exception {
    Configuration conf = util.getConfiguration();
    conf.setInt("hbase.hstore.compaction.min", 2);
    generateRandomStartKeys(5);
    util.startMiniCluster();
    try (Connection conn = ConnectionFactory.createConnection(conf);
        Admin admin = conn.getAdmin()) {
        Path testDir = util.getDataTestDirOnTestFS("testExcludeMinorCompaction");
        final FileSystem fs = util.getDFSCluster().getFileSystem();
        Table table = util.createTable(TABLE_NAME, FAMILIES);
        assertEquals("Should start with empty table", 0, util.countRows(table));
        // deep inspection: get the StoreFile dir
        final Path storePath = new Path(FSUtils.getTableDir(FSUtils.getRootDir(conf), TABLE_NAME), new Path(admin.getTableRegions(TABLE_NAME).get(0).getEncodedName(), Bytes.toString(FAMILIES[0])));
        assertEquals(0, fs.listStatus(storePath).length);
        // put some data in it and flush to create a storefile
        Put p = new Put(Bytes.toBytes("test"));
        p.addColumn(FAMILIES[0], Bytes.toBytes("1"), Bytes.toBytes("1"));
        table.put(p);
        admin.flush(TABLE_NAME);
        assertEquals(1, util.countRows(table));
        quickPoll(new Callable<Boolean>() {

            @Override
            public Boolean call() throws Exception {
                return fs.listStatus(storePath).length == 1;
            }
        }, 5000);
        // Generate a bulk load file with more rows
        conf.setBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude", true);
        RegionLocator regionLocator = conn.getRegionLocator(TABLE_NAME);
        runIncrementalPELoad(conf, table.getTableDescriptor(), regionLocator, testDir, false);
        // Perform the actual load
        new LoadIncrementalHFiles(conf).doBulkLoad(testDir, admin, table, regionLocator);
        // Ensure data shows up
        int expectedRows = NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT;
        assertEquals("LoadIncrementalHFiles should put expected data in table", expectedRows + 1, util.countRows(table));
        // should have a second StoreFile now
        assertEquals(2, fs.listStatus(storePath).length);
        // minor compactions shouldn't get rid of the file
        admin.compact(TABLE_NAME);
        try {
            quickPoll(new Callable<Boolean>() {

                @Override
                public Boolean call() throws Exception {
                    return fs.listStatus(storePath).length == 1;
                }
            }, 5000);
            throw new IOException("SF# = " + fs.listStatus(storePath).length);
        } catch (AssertionError ae) {
        // this is expected behavior
        }
        // a major compaction should work though
        admin.majorCompact(TABLE_NAME);
        quickPoll(new Callable<Boolean>() {

            @Override
            public Boolean call() throws Exception {
                return fs.listStatus(storePath).length == 1;
            }
        }, 5000);
    } finally {
        util.shutdownMiniCluster();
    }
}
Also used : Path(org.apache.hadoop.fs.Path) RegionLocator(org.apache.hadoop.hbase.client.RegionLocator) Table(org.apache.hadoop.hbase.client.Table) Configuration(org.apache.hadoop.conf.Configuration) HBaseConfiguration(org.apache.hadoop.hbase.HBaseConfiguration) Connection(org.apache.hadoop.hbase.client.Connection) IOException(java.io.IOException) Admin(org.apache.hadoop.hbase.client.Admin) Put(org.apache.hadoop.hbase.client.Put) UnsupportedEncodingException(java.io.UnsupportedEncodingException) IOException(java.io.IOException) FileSystem(org.apache.hadoop.fs.FileSystem) DistributedFileSystem(org.apache.hadoop.hdfs.DistributedFileSystem) Ignore(org.junit.Ignore) Test(org.junit.Test)

Example 20 with RegionLocator

use of org.apache.hadoop.hbase.client.RegionLocator in project hbase by apache.

the class TestLoadIncrementalHFilesSplitRecovery method populateTable.

/**
   * Populate table with known values.
   */
private void populateTable(final Connection connection, TableName table, int value) throws Exception {
    // create HFiles for different column families
    LoadIncrementalHFiles lih = new LoadIncrementalHFiles(util.getConfiguration());
    Path bulk1 = buildBulkFiles(table, value);
    try (Table t = connection.getTable(table);
        RegionLocator locator = connection.getRegionLocator(table);
        Admin admin = connection.getAdmin()) {
        lih.doBulkLoad(bulk1, admin, t, locator);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) RegionLocator(org.apache.hadoop.hbase.client.RegionLocator) Table(org.apache.hadoop.hbase.client.Table) Admin(org.apache.hadoop.hbase.client.Admin)

Aggregations

RegionLocator (org.apache.hadoop.hbase.client.RegionLocator)84 Table (org.apache.hadoop.hbase.client.Table)59 Test (org.junit.Test)49 TableName (org.apache.hadoop.hbase.TableName)39 Admin (org.apache.hadoop.hbase.client.Admin)33 Path (org.apache.hadoop.fs.Path)31 HRegionLocation (org.apache.hadoop.hbase.HRegionLocation)30 HRegionInfo (org.apache.hadoop.hbase.HRegionInfo)29 Connection (org.apache.hadoop.hbase.client.Connection)25 Configuration (org.apache.hadoop.conf.Configuration)21 IOException (java.io.IOException)19 HTableDescriptor (org.apache.hadoop.hbase.HTableDescriptor)15 FileSystem (org.apache.hadoop.fs.FileSystem)14 HBaseConfiguration (org.apache.hadoop.hbase.HBaseConfiguration)13 ServerName (org.apache.hadoop.hbase.ServerName)13 HColumnDescriptor (org.apache.hadoop.hbase.HColumnDescriptor)12 ClusterConnection (org.apache.hadoop.hbase.client.ClusterConnection)10 Put (org.apache.hadoop.hbase.client.Put)10 ArrayList (java.util.ArrayList)9 Result (org.apache.hadoop.hbase.client.Result)8